Skip to content

Commit

Permalink
initial fix of issue 67, need to test further
Browse files Browse the repository at this point in the history
  • Loading branch information
SiberiaWolfP committed Feb 8, 2024
1 parent 9b64863 commit cfe0649
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 3 deletions.
34 changes: 31 additions & 3 deletions duckpgq/src/duckpgq/functions/scalar/iterativelength.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
auto src_data = (int64_t *)vdata_src.data;
auto dst_data = (int64_t *)vdata_dst.data;

// get lowerbound and upperbound
auto &lower_bound = args.data[4];
auto &upper_bound = args.data[5];
UnifiedVectorFormat vdata_lower_bound;
UnifiedVectorFormat vdata_upper_bound;
lower_bound.ToUnifiedFormat(args.size(), vdata_lower_bound);
upper_bound.ToUnifiedFormat(args.size(), vdata_upper_bound);
auto lower_bound_data = (int64_t *)vdata_lower_bound.data;
auto upper_bound_data = (int64_t *)vdata_upper_bound.data;

ValidityMask &result_validity = FlatVector::Validity(result);

// create result vector
Expand Down Expand Up @@ -115,6 +125,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
result_data[search_num] =
(uint64_t)0; // path of length 0 does not require a search
} else {
seen[src_data[src_pos]][lane] = true;
visit1[src_data[src_pos]][lane] = true;
lane_to_num[lane] = search_num; // active lane
active++;
Expand All @@ -134,9 +145,25 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
int64_t search_num = lane_to_num[lane];
if (search_num >= 0) { // active lane
int64_t dst_pos = vdata_dst.sel->get_index(search_num);
if (seen[dst_data[dst_pos]][lane]) {
result_data[search_num] =
iter; /* found at iter => iter = path length */
if (seen[dst_data[dst_pos]][lane]){

// check if the path length is within bounds
// bound vector is either a constant or a flat vector
if (lower_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ?
iter < lower_bound_data[0] : iter < lower_bound_data[dst_pos]) {
// when reach the destination too early, treat destination as null
// looks like the graph does not have that vertex
seen[dst_data[dst_pos]][lane] = false;
(iter & 1) ? visit2[dst_data[dst_pos]][lane] = false
: visit1[dst_data[dst_pos]][lane] = false;
continue;
} else if (upper_bound.GetVectorType() == VectorType::CONSTANT_VECTOR ?
iter > upper_bound_data[0] : iter > upper_bound_data[dst_pos]) {
result_data[search_num] = (int64_t)-1; /* no path */
} else {
result_data[search_num] =
iter; /* found at iter => iter = path length */
}
lane_to_num[lane] = -1; // mark inactive
active--;
}
Expand All @@ -160,6 +187,7 @@ static void IterativeLengthFunction(DataChunk &args, ExpressionState &state,
CreateScalarFunctionInfo DuckPGQFunctions::GetIterativeLengthFunction() {
auto fun = ScalarFunction("iterativelength",
{LogicalType::INTEGER, LogicalType::BIGINT,
LogicalType::BIGINT, LogicalType::BIGINT,
LogicalType::BIGINT, LogicalType::BIGINT},
LogicalType::BIGINT, IterativeLengthFunction,
IterativeLengthFunctionData::IterativeLengthBind);
Expand Down
37 changes: 37 additions & 0 deletions scripts/kuzu_shortest_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import kuzu
import pandas as pd

db = kuzu.Database('./test')
conn = kuzu.Connection(db)

# Drop the table if it exists:
try:
conn.execute("DROP TABLE knows")
except:
pass
try:
conn.execute("DROP TABLE Person")
except:
pass

# Define the schema:
conn.execute("CREATE NODE TABLE Person (creationDate TIMESTAMP, id INT64, firstName STRING, lastName STRING, gender STRING, birthday DATE, locationIP STRING, browserUsed STRING, LocationCityId INT64, speaks STRING, email STRING, PRIMARY KEY (id))")
conn.execute("CREATE REL TABLE knows (FROM Person TO Person)")

# Load the data:
conn.execute("Copy Person FROM './test/person.csv'")
conn.execute("Copy knows FROM './test/person_knows_person.csv'")

# Calculate the shortest path between two people with bounded distance:
MIN_DISTANCE = 0
MAX_DISTANCE = 30
results = pd.DataFrame()
for low in range(MIN_DISTANCE, MAX_DISTANCE + 1):
for high in range(low, MAX_DISTANCE + 1):
result = conn.execute("MATCH (a:Person)-[e:knows*%d..%d]->(b:Person) RETURN a.id, b.id, length(e) AS distance ORDER BY distance ASC" % (low, high)).get_as_df()
result = result.drop_duplicates(subset=['a.id', 'b.id'], keep='first')
result['min_distance'] = low
result['max_distance'] = high
results = pd.concat([results, result], ignore_index=True)

results.to_csv('./test/shortest_length_kuzu.csv', index=False)
199 changes: 199 additions & 0 deletions test/sql/path-finding/shortest_path_bound.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# name: test/sql/sqlpgq/shortest_path_bound.test
# group: [sqlpgq]

statement ok
pragma enable_verification

require duckpgq

# Graph to test regular shortest path bound
# (0) -> (1)
# ↓ ↑
# (2) -> (3)

statement ok
CREATE TABLE Point(id BIGINT); INSERT INTO Point VALUES (0), (1), (2), (3);

statement ok
CREATE TABLE know(src BIGINT, dst BIGINT); INSERT INTO know VALUES (0, 1), (0, 2), (2, 3), (3, 1);

statement ok
-CREATE PROPERTY GRAPH pg
VERTEX TABLES (
Point PROPERTIES ( id ) LABEL Pnt
)
EDGE TABLES (
know SOURCE KEY ( src ) REFERENCES Point ( id )
DESTINATION KEY ( dst ) REFERENCES Point ( id )
LABEL Knows
);

query III
WITH cte1 AS (
SELECT CREATE_CSR_EDGE(
0,
(SELECT count(a.id) FROM Point a),
CAST (
(SELECT sum(CREATE_CSR_VERTEX(
0,
(SELECT count(a.id) FROM Point a),
sub.dense_id,
sub.cnt)
)
FROM (
SELECT a.rowid as dense_id, count(k.src) as cnt
FROM Point a
LEFT JOIN Know k ON k.src = a.id
GROUP BY a.rowid) sub
)
AS BIGINT),
a.rowid,
c.rowid,
k.rowid) as temp
FROM Know k
JOIN Point a on a.id = k.src
JOIN Point c on c.id = k.dst
) SELECT a.id as srd_id, b.id as dst_id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3) as path_length
FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 2, 3);
----
0 1 3
0 3 2

query III
WITH cte1 AS (
SELECT CREATE_CSR_EDGE(
0,
(SELECT count(a.id) FROM Point a),
CAST (
(SELECT sum(CREATE_CSR_VERTEX(
0,
(SELECT count(a.id) FROM Point a),
sub.dense_id,
sub.cnt)
)
FROM (
SELECT a.rowid as dense_id, count(k.src) as cnt
FROM Point a
LEFT JOIN Know k ON k.src = a.id
GROUP BY a.rowid) sub
)
AS BIGINT),
a.rowid,
c.rowid,
k.rowid) as temp
FROM Know k
JOIN Point a on a.id = k.src
JOIN Point c on c.id = k.dst
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3) as path_length
FROM Point a, Point b, (select count(cte1.temp) * 0 as temp from cte1) __x
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point), a.rowid, b.rowid, 1, 3);
----
0 1 1
0 2 1
0 3 2

# Graph to test shortest path bound with a cycle
# (0) --> (1) <-> (2)
# 0 to 1 is 1 hop

statement ok
CREATE TABLE Point2(id BIGINT); INSERT INTO Point2 VALUES (0), (1), (2);

statement ok
CREATE TABLE know2(src BIGINT, dst BIGINT); INSERT INTO know2 VALUES (0, 1), (1, 2), (2, 1);

statement ok
-CREATE PROPERTY GRAPH pg2
VERTEX TABLES (
Point2 PROPERTIES ( id ) LABEL Pnt2
)
EDGE TABLES (
know2 SOURCE KEY ( src ) REFERENCES Point2 ( id )
DESTINATION KEY ( dst ) REFERENCES Point2 ( id )
LABEL Knows2
);

query III
WITH cte1 AS (
SELECT CREATE_CSR_EDGE(
0,
(SELECT count(a.id) FROM Point2 a),
CAST (
(SELECT sum(CREATE_CSR_VERTEX(
0,
(SELECT count(a.id) FROM Point2 a),
sub.dense_id,
sub.cnt)
)
FROM (
SELECT a.rowid as dense_id, count(k.src) as cnt
FROM Point2 a
LEFT JOIN know2 k ON k.src = a.id
GROUP BY a.rowid) sub
)
AS BIGINT),
a.rowid,
c.rowid,
k.rowid) as temp
FROM know2 k
JOIN Point2 a on a.id = k.src
JOIN Point2 c on c.id = k.dst
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3) as path_length
FROM Point2 a, Point2 b, (select count(cte1.temp) * 0 as temp from cte1) __x
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point2), a.rowid, b.rowid, 2, 3);
----
0 2 2


# Graph to test shortest path bound with a cycle
# (1) <- (0) <-> (2)
# 0 to 1 is 1 hop

statement ok
CREATE TABLE Point3(id BIGINT); INSERT INTO Point3 VALUES (0), (1), (2);

statement ok
CREATE TABLE know3(src BIGINT, dst BIGINT); INSERT INTO know3 VALUES (0, 1), (0, 2), (2, 0);

statement ok
-CREATE PROPERTY GRAPH pg3
VERTEX TABLES (
Point3 PROPERTIES ( id ) LABEL Pnt
)
EDGE TABLES (
know3 SOURCE KEY ( src ) REFERENCES Point3 ( id )
DESTINATION KEY ( dst ) REFERENCES Point3 ( id )
LABEL Knows
);

query III
WITH cte1 AS (
SELECT CREATE_CSR_EDGE(
0,
(SELECT count(a.id) FROM Point3 a),
CAST (
(SELECT sum(CREATE_CSR_VERTEX(
0,
(SELECT count(a.id) FROM Point3 a),
sub.dense_id,
sub.cnt)
)
FROM (
SELECT a.rowid as dense_id, count(k.src) as cnt
FROM Point3 a
LEFT JOIN know3 k ON k.src = a.id
GROUP BY a.rowid) sub
)
AS BIGINT),
a.rowid,
c.rowid,
k.rowid) as temp
FROM know3 k
JOIN Point3 a on a.id = k.src
JOIN Point3 c on c.id = k.dst
) SELECT a.id, b.id, iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3) as path_length
FROM Point3 a, Point3 b, (select count(cte1.temp) * 0 as temp from cte1) __x
WHERE a.id = 0 and __x.temp * 0 + iterativelength(0, (select count(*) from Point3), a.rowid, b.rowid, 2, 3);
----

0 comments on commit cfe0649

Please sign in to comment.