From ab76b93014e23b1b7a8f5669d588b01c948366db Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 11 Jan 2024 09:45:47 +0100 Subject: [PATCH 01/47] Remove test from other branch --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index f484fe08..7aafafca 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit f484fe0899e67fc29b6aec7ef1734925dca7ef45 +Subproject commit 7aafafcabc08298ea907c6059ad281d081dd0c6f From 39082085fb9a6412f5cc23fad2adca6a713891fe Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 11 Jan 2024 10:52:55 +0100 Subject: [PATCH 02/47] Adding test --- test/sql/path-finding/complex_matching.test | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index e2eb792d..25bc56a4 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -55,4 +55,11 @@ query II COLUMNS (p.id as p_id, p2.id as p2_id, t.id) ) tmp limit 10; ----- \ No newline at end of file +---- + + +statement ok +-FROM GRAPH_TABLE (snb + MATCH p = ANY SHORTEST (a:Person where a.id = 28587302322180)-[k:knows]->{1,3}(b:Person) + COLUMNS (a.id) + ) tmp; From 6278c8afdc8111b1f20bf755456835c9b04cad51 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 11 Jan 2024 11:11:12 +0100 Subject: [PATCH 03/47] Small refactor --- duckpgq/src/duckpgq_extension.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 5cc1404d..2e5b2428 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -89,16 +89,18 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, OperatorExtensionInfo *info, SQLStatement &statement) { auto lookup = context.registered_state.find("duckpgq"); - if (lookup != context.registered_state.end()) { - auto duckpgq_state = (DuckPGQState *)lookup->second.get(); - auto duckpgq_binder = Binder::CreateBinder(context); - auto duckpgq_parse_data = - dynamic_cast(duckpgq_state->parse_data.get()); - if (duckpgq_parse_data) { - return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); - } - } - throw BinderException("Registered state not found"); + if (lookup == context.registered_state.end()) { + throw BinderException("Registered state not found"); + } + + auto duckpgq_state = (DuckPGQState *)lookup->second.get(); + auto duckpgq_binder = Binder::CreateBinder(context); + auto duckpgq_parse_data = + dynamic_cast(duckpgq_state->parse_data.get()); + if (duckpgq_parse_data) { + return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); + } + throw BinderException("Unable to find DuckPGQ Parse Data"); } ParserExtensionPlanResult From b6492b31ce4909313c4eae3c1ba7267f7d028d83 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 11:10:16 +0100 Subject: [PATCH 04/47] Remove condition from getPathElement --- .../functions/tablefunctions/match.hpp | 11 +- .../functions/tablefunctions/match.cpp | 549 +++++++++--------- 2 files changed, 278 insertions(+), 282 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 3576bc00..0b35a3d9 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -42,8 +42,7 @@ struct PGQMatchFunction : public TableFunction { const string &vertex_alias, const string &edge_alias); static PathElement * - GetPathElement(unique_ptr &path_reference, - vector> &conditions); + GetPathElement(unique_ptr &path_reference); static unique_ptr GetCountTable(const shared_ptr &edge_table, @@ -89,10 +88,10 @@ struct PGQMatchFunction : public TableFunction { vector> &conditions, unordered_map &alias_map, int32_t &extra_alias_counter); - static PathElement * - HandleNestedSubPath(unique_ptr &path_reference, - vector> &conditions, - idx_t element_idx); +// static PathElement * +// HandleNestedSubPath(unique_ptr &path_reference, +// vector> &conditions, +// idx_t element_idx); static unique_ptr MatchBindReplace(ClientContext &context, TableFunctionBindInput &input); diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 49494c6c..f1294b76 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -24,6 +24,8 @@ #include "duckdb/parser/property_graph_table.hpp" #include "duckdb/parser/subpath_element.hpp" +#include + namespace duckdb { @@ -126,26 +128,14 @@ unique_ptr PGQMatchFunction::CreateMatchJoinExpression( } PathElement *PGQMatchFunction::GetPathElement( - unique_ptr &path_reference, - vector> &conditions) { + unique_ptr &path_reference) { if (path_reference->path_reference_type == PGQPathReferenceType::PATH_ELEMENT) { return reinterpret_cast(path_reference.get()); } else if (path_reference->path_reference_type == - PGQPathReferenceType::SUBPATH) { - auto subpath = reinterpret_cast(path_reference.get()); - - if (subpath->where_clause) { - conditions.push_back(std::move(subpath->where_clause)); - } - // If the subpath has only one element (the case when there is a WHERE in - // the element) we unpack the subpath into a PathElement. - if (subpath->path_list.size() == 1) { - return reinterpret_cast(subpath->path_list[0].get()); - } else { - return nullptr; - } - } else { + PGQPathReferenceType::SUBPATH) { + return nullptr; + } else { throw InternalException("Unknown path reference type detected"); } } @@ -475,12 +465,12 @@ void PGQMatchFunction::EdgeTypeLeftRight( conditions.push_back(std::move(combined_expr)); } -PathElement *PGQMatchFunction::HandleNestedSubPath( - unique_ptr &path_reference, - vector> &conditions, idx_t element_idx) { - auto subpath = reinterpret_cast(path_reference.get()); - return GetPathElement(subpath->path_list[element_idx], conditions); -} +//PathElement *PGQMatchFunction::HandleNestedSubPath( +// unique_ptr &path_reference, +// vector> &conditions, idx_t element_idx) { +// auto subpath = reinterpret_cast(path_reference.get()); +// return GetPathElement(subpath->path_list[element_idx], conditions); +//} unique_ptr CreateWhereClause(vector> &conditions) { @@ -516,6 +506,15 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( std::move(pathfinding_children)); } +void UnnestSubpath(unique_ptr &subpath, + vector> &conditions, + unique_ptr &from_clause) { + auto path_element = + reinterpret_cast(subpath.get()); + std::cout << path_element->path_variable; + +} + unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &) { auto data = make_uniq(); @@ -534,28 +533,29 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, unique_ptr from_clause; int32_t extra_alias_counter = 0; - bool path_finding = false; for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { auto &path_pattern = ref->path_patterns[idx_i]; // Check if the element is PathElement or a Subpath with potentially many // items PathElement *previous_vertex_element = - GetPathElement(path_pattern->path_elements[0], conditions); + GetPathElement(path_pattern->path_elements[0]); if (!previous_vertex_element) { - auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( - path_pattern, pg_table, ref->column_list, named_subpaths); - if (from_clause) { - // The from clause already contains TableRefs, so we need to make a join - // with the subquery - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(from_clause); - from_join->right = std::move(subpath_pattern_subquery); - from_clause = std::move(from_join); - } else { - // The from clause was still empty, so we can just place the subquery - // there - from_clause = std::move(subpath_pattern_subquery); - } + UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); + +// auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( +// path_pattern, pg_table, ref->column_list, named_subpaths); +// if (from_clause) { +// // The from clause already contains TableRefs, so we need to make a join +// // with the subquery +// auto from_join = make_uniq(JoinRefType::CROSS); +// from_join->left = std::move(from_clause); +// from_join->right = std::move(subpath_pattern_subquery); +// from_clause = std::move(from_join); +// } else { +// // The from clause was still empty, so we can just place the subquery +// // there +// from_clause = std::move(subpath_pattern_subquery); +// } } else { auto previous_vertex_table = FindGraphTable(previous_vertex_element->label, *pg_table); @@ -568,19 +568,19 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, idx_j < ref->path_patterns[idx_i]->path_elements.size(); idx_j = idx_j + 2) { PathElement *edge_element = - GetPathElement(path_pattern->path_elements[idx_j], conditions); + GetPathElement(path_pattern->path_elements[idx_j]); if (!edge_element) { auto subpath = - reinterpret_cast(path_pattern->path_elements[0].get()); - edge_element = GetPathElement(subpath->path_list[idx_j], conditions); + reinterpret_cast(path_pattern->path_elements[idx_j].get()); + edge_element = GetPathElement(subpath->path_list[idx_j]); } PathElement *next_vertex_element = - GetPathElement(path_pattern->path_elements[idx_j + 1], conditions); + GetPathElement(path_pattern->path_elements[idx_j + 1]); if (!next_vertex_element) { auto subpath = reinterpret_cast(path_pattern->path_elements[0].get()); next_vertex_element = - GetPathElement(subpath->path_list[idx_j + 1], conditions); + GetPathElement(subpath->path_list[idx_j + 1]); } if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { @@ -598,11 +598,9 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, auto *subpath = reinterpret_cast( path_pattern->path_elements[idx_j].get()); if (subpath->upper > 1) { - path_finding = true; //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, - //! dst b + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b select_node->cte_map.map["cte1"] = CreateCSRCTE( edge_table, previous_vertex_element->variable_binding, edge_element->variable_binding, @@ -692,7 +690,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; alias_map[edge_element->variable_binding] = edge_table->table_name; - if (!path_finding) { + +// if (!path_finding) { switch (edge_element->match_type) { case PGQMatchType::MATCH_EDGE_ANY: { select_node->modifiers.push_back(make_uniq()); @@ -722,11 +721,10 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, alias_map, extra_alias_counter); break; } - default: throw InternalException("Unknown match type found"); } - } +// } previous_vertex_element = next_vertex_element; previous_vertex_table = next_vertex_table; @@ -743,23 +741,23 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, } } - if (!path_finding) { +// if (!path_finding) { // Go through all aliases encountered - for (auto &table_alias_entry : alias_map) { - auto table_ref = make_uniq(); - table_ref->table_name = table_alias_entry.second; - table_ref->alias = table_alias_entry.first; - - if (from_clause) { - auto new_root = make_uniq(JoinRefType::CROSS); - new_root->left = std::move(from_clause); - new_root->right = std::move(table_ref); - from_clause = std::move(new_root); - } else { - from_clause = std::move(table_ref); - } - } - } + for (auto &table_alias_entry : alias_map) { + auto table_ref = make_uniq(); + table_ref->table_name = table_alias_entry.second; + table_ref->alias = table_alias_entry.first; + + if (from_clause) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(from_clause); + new_root->right = std::move(table_ref); + from_clause = std::move(new_root); + } else { + from_clause = std::move(table_ref); + } + } +// } select_node->from_table = std::move(from_clause); if (ref->where_clause) { @@ -825,215 +823,214 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, return std::move(result); } - -unique_ptr PGQMatchFunction::GenerateSubpathPatternSubquery( - unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, - vector> &column_list, - unordered_set &named_subpaths) { - vector> conditions; - auto path_element = - reinterpret_cast(path_pattern->path_elements[0].get()); - auto select_node = make_uniq(); - unordered_map alias_map; - string named_subpath = path_element->path_variable; - named_subpaths.insert(named_subpath); - int32_t extra_alias_counter = 0; - bool path_finding = false; - auto previous_vertex_element = - GetPathElement(path_element->path_list[0], conditions); - auto previous_vertex_table = - FindGraphTable(previous_vertex_element->label, *pg_table); - CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); - alias_map[previous_vertex_element->variable_binding] = - previous_vertex_table->table_name; - for (idx_t idx_j = 1; idx_j < path_element->path_list.size(); - idx_j = idx_j + 2) { - PathElement *edge_element = - GetPathElement(path_element->path_list[idx_j], conditions); - PathElement *next_vertex_element = - GetPathElement(path_element->path_list[idx_j + 1], conditions); - if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - throw BinderException("Vertex and edge patterns must be alternated."); - } - - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - CheckInheritance(edge_table, edge_element, conditions); - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, *pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - - if (path_element->path_list[idx_j]->path_reference_type == - PGQPathReferenceType::SUBPATH) { - auto *subpath = - reinterpret_cast(path_element->path_list[idx_j].get()); - if (subpath->upper > 1) { - path_finding = true; - if (!named_subpath.empty() && path_pattern->shortest) { - // todo(dtenwolde) does not necessarily have to be a shortest path - // query if it is a named subpath. It can also be a basic pattern - // matching that is named. - auto shortest_path_function = CreatePathFindingFunction( - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, edge_table, - "shortestpath"); - shortest_path_function->alias = "path"; - select_node->select_list.push_back(std::move(shortest_path_function)); - } - select_node->cte_map.map["cte1"] = - CreateCSRCTE(edge_table, previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding); - - //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - auto temp_cte_select_subquery = CreateCountCTESubquery(); - - auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); - - //! src alias (FROM src a) - auto src_vertex_ref = make_uniq(); - src_vertex_ref->table_name = edge_table->source_reference; - src_vertex_ref->alias = previous_vertex_element->variable_binding; - - cross_join_src_dst->left = std::move(src_vertex_ref); - - //! dst alias (FROM dst b) - auto dst_vertex_ref = make_uniq(); - dst_vertex_ref->table_name = edge_table->destination_reference; - dst_vertex_ref->alias = next_vertex_element->variable_binding; - - cross_join_src_dst->right = std::move(dst_vertex_ref); - - auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - cross_join_with_cte->left = std::move(temp_cte_select_subquery); - cross_join_with_cte->right = std::move(cross_join_src_dst); - - if (select_node->from_table) { - // create a cross join since there is already something in the from - // clause - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(select_node->from_table); - from_join->right = std::move(cross_join_with_cte); - select_node->from_table = std::move(from_join); - } else { - select_node->from_table = std::move(cross_join_with_cte); - } - //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst - //! b - - //! START - //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) from - //! dst c, a.rowid, b.rowid) between lower and upper - auto reachability_function = - CreatePathFindingFunction(previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, - edge_table, "iterativelength"); - - auto cte_col_ref = make_uniq("temp", "__x"); - - vector> addition_children; - addition_children.push_back(std::move(cte_col_ref)); - addition_children.push_back(std::move(reachability_function)); - - auto addition_function = - make_uniq("add", std::move(addition_children)); - auto lower_limit = - make_uniq(Value::BIGINT(subpath->lower)); - auto upper_limit = - make_uniq(Value::BIGINT(subpath->upper)); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); - - //! END - //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) from - //! src s, a.rowid, b.rowid) between lower and upper - } - // check aliases - alias_map[next_vertex_element->variable_binding] = - next_vertex_table->table_name; - alias_map[edge_element->variable_binding] = edge_table->table_name; - if (!path_finding) { - switch (edge_element->match_type) { - case PGQMatchType::MATCH_EDGE_ANY: { - select_node->modifiers.push_back(make_uniq()); - EdgeTypeAny(edge_table, edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - } - case PGQMatchType::MATCH_EDGE_LEFT: - EdgeTypeLeft(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_RIGHT: - EdgeTypeRight(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - EdgeTypeLeftRight(edge_table, edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, - alias_map, extra_alias_counter); - break; - } - default: - throw InternalException("Unknown match type found"); - } - } - previous_vertex_element = next_vertex_element; - previous_vertex_table = next_vertex_table; - } - } - - select_node->where_clause = CreateWhereClause(conditions); - vector> substitute_column_list; - for (auto &expression : column_list) { - const auto &column_ref = - dynamic_cast(expression.get()); - if (column_ref == nullptr) { - continue; - } - // If the table is referenced in this subquery (count() > 0) - if (alias_map.count(column_ref->column_names[0])) { - select_node->select_list.push_back(std::move(expression)); - // Create a substitute - unique_ptr new_upper_column_ref; - if (column_ref->alias.empty()) { - new_upper_column_ref = make_uniq( - column_ref->column_names[1], named_subpath); - } else { - new_upper_column_ref = - make_uniq(column_ref->alias, named_subpath); - } - new_upper_column_ref->alias = column_ref->alias; - substitute_column_list.push_back(std::move(new_upper_column_ref)); - } - } - // Remove the elements from the original column_list that are now NULL - for (auto it = column_list.begin(); it != column_list.end();) { - if (!*it) { - it = column_list.erase(it); - } else { - ++it; - } - } - // Add the ColumnRefs that were previously moved to the subquery with the - // subquery name as table_name - for (auto &expression : substitute_column_list) { - column_list.push_back(std::move(expression)); - } - auto subquery = make_uniq(); - subquery->node = std::move(select_node); - - return make_uniq(std::move(subquery), named_subpath); -} +// +//unique_ptr PGQMatchFunction::GenerateSubpathPatternSubquery( +// unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, +// vector> &column_list, +// unordered_set &named_subpaths) { +// vector> conditions; +// auto path_element = +// reinterpret_cast(path_pattern->path_elements[0].get()); +// auto select_node = make_uniq(); +// unordered_map alias_map; +// string named_subpath = path_element->path_variable; +// named_subpaths.insert(named_subpath); +// int32_t extra_alias_counter = 0; +// bool path_finding = false; +// auto previous_vertex_element = +// GetPathElement(path_element->path_list[0], conditions); +// auto previous_vertex_table = +// FindGraphTable(previous_vertex_element->label, *pg_table); +// CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); +// alias_map[previous_vertex_element->variable_binding] = +// previous_vertex_table->table_name; +// for (idx_t idx_j = 1; idx_j < path_element->path_list.size(); +// idx_j = idx_j + 2) { +// PathElement *edge_element = +// GetPathElement(path_element->path_list[idx_j], conditions); +// PathElement *next_vertex_element = +// GetPathElement(path_element->path_list[idx_j + 1], conditions); +// if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || +// previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { +// throw BinderException("Vertex and edge patterns must be alternated."); +// } +// +// auto edge_table = FindGraphTable(edge_element->label, *pg_table); +// CheckInheritance(edge_table, edge_element, conditions); +// auto next_vertex_table = +// FindGraphTable(next_vertex_element->label, *pg_table); +// CheckInheritance(next_vertex_table, next_vertex_element, conditions); +// +// if (path_element->path_list[idx_j]->path_reference_type == +// PGQPathReferenceType::SUBPATH) { +// auto *subpath = +// reinterpret_cast(path_element->path_list[idx_j].get()); +// if (subpath->upper > 1) { +// path_finding = true; +// if (!named_subpath.empty() && path_pattern->shortest) { +// // todo(dtenwolde) does not necessarily have to be a shortest path +// // query if it is a named subpath. It can also be a basic pattern +// // matching that is named. +// auto shortest_path_function = CreatePathFindingFunction( +// previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, edge_table, +// "shortestpath"); +// shortest_path_function->alias = "path"; +// select_node->select_list.push_back(std::move(shortest_path_function)); +// } +// select_node->cte_map.map["cte1"] = +// CreateCSRCTE(edge_table, previous_vertex_element->variable_binding, +// edge_element->variable_binding, +// next_vertex_element->variable_binding); +// +// //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x +// auto temp_cte_select_subquery = CreateCountCTESubquery(); +// +// auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); +// +// //! src alias (FROM src a) +// auto src_vertex_ref = make_uniq(); +// src_vertex_ref->table_name = edge_table->source_reference; +// src_vertex_ref->alias = previous_vertex_element->variable_binding; +// +// cross_join_src_dst->left = std::move(src_vertex_ref); +// +// //! dst alias (FROM dst b) +// auto dst_vertex_ref = make_uniq(); +// dst_vertex_ref->table_name = edge_table->destination_reference; +// dst_vertex_ref->alias = next_vertex_element->variable_binding; +// +// cross_join_src_dst->right = std::move(dst_vertex_ref); +// +// auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); +// cross_join_with_cte->left = std::move(temp_cte_select_subquery); +// cross_join_with_cte->right = std::move(cross_join_src_dst); +// +// if (select_node->from_table) { +// // create a cross join since there is already something in the from +// // clause +// auto from_join = make_uniq(JoinRefType::CROSS); +// from_join->left = std::move(select_node->from_table); +// from_join->right = std::move(cross_join_with_cte); +// select_node->from_table = std::move(from_join); +// } else { +// select_node->from_table = std::move(cross_join_with_cte); +// } +// //! END +// //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b +// +// //! START +// //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) from +// //! dst c, a.rowid, b.rowid) between lower and upper +// auto reachability_function = +// CreatePathFindingFunction(previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, +// edge_table, "iterativelength"); +// +// auto cte_col_ref = make_uniq("temp", "__x"); +// +// vector> addition_children; +// addition_children.push_back(std::move(cte_col_ref)); +// addition_children.push_back(std::move(reachability_function)); +// +// auto addition_function = +// make_uniq("add", std::move(addition_children)); +// auto lower_limit = +// make_uniq(Value::BIGINT(subpath->lower)); +// auto upper_limit = +// make_uniq(Value::BIGINT(subpath->upper)); +// auto between_expression = make_uniq( +// std::move(addition_function), std::move(lower_limit), +// std::move(upper_limit)); +// conditions.push_back(std::move(between_expression)); +// +// //! END +// //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) from +// //! src s, a.rowid, b.rowid) between lower and upper +// } +// // check aliases +// alias_map[next_vertex_element->variable_binding] = +// next_vertex_table->table_name; +// alias_map[edge_element->variable_binding] = edge_table->table_name; +// if (!path_finding) { +// switch (edge_element->match_type) { +// case PGQMatchType::MATCH_EDGE_ANY: { +// select_node->modifiers.push_back(make_uniq()); +// EdgeTypeAny(edge_table, edge_element->variable_binding, +// previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, conditions); +// break; +// } +// case PGQMatchType::MATCH_EDGE_LEFT: +// EdgeTypeLeft(edge_table, next_vertex_table->table_name, +// previous_vertex_table->table_name, +// edge_element->variable_binding, +// previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, conditions); +// break; +// case PGQMatchType::MATCH_EDGE_RIGHT: +// EdgeTypeRight(edge_table, next_vertex_table->table_name, +// previous_vertex_table->table_name, +// edge_element->variable_binding, +// previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, conditions); +// break; +// case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { +// EdgeTypeLeftRight(edge_table, edge_element->variable_binding, +// previous_vertex_element->variable_binding, +// next_vertex_element->variable_binding, conditions, +// alias_map, extra_alias_counter); +// break; +// } +// default: +// throw InternalException("Unknown match type found"); +// } +// } +// previous_vertex_element = next_vertex_element; +// previous_vertex_table = next_vertex_table; +// } +// } +// +// select_node->where_clause = CreateWhereClause(conditions); +// vector> substitute_column_list; +// for (auto &expression : column_list) { +// const auto &column_ref = +// dynamic_cast(expression.get()); +// if (column_ref == nullptr) { +// continue; +// } +// // If the table is referenced in this subquery (count() > 0) +// if (alias_map.count(column_ref->column_names[0])) { +// select_node->select_list.push_back(std::move(expression)); +// // Create a substitute +// unique_ptr new_upper_column_ref; +// if (column_ref->alias.empty()) { +// new_upper_column_ref = make_uniq( +// column_ref->column_names[1], named_subpath); +// } else { +// new_upper_column_ref = +// make_uniq(column_ref->alias, named_subpath); +// } +// new_upper_column_ref->alias = column_ref->alias; +// substitute_column_list.push_back(std::move(new_upper_column_ref)); +// } +// } +// // Remove the elements from the original column_list that are now NULL +// for (auto it = column_list.begin(); it != column_list.end();) { +// if (!*it) { +// it = column_list.erase(it); +// } else { +// ++it; +// } +// } +// // Add the ColumnRefs that were previously moved to the subquery with the +// // subquery name as table_name +// for (auto &expression : substitute_column_list) { +// column_list.push_back(std::move(expression)); +// } +// auto subquery = make_uniq(); +// subquery->node = std::move(select_node); +// +// return make_uniq(std::move(subquery), named_subpath); +//} } // namespace duckdb From 9176f9610e0d6d2bb0352dafebdabc4749314e5b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 11:10:43 +0100 Subject: [PATCH 05/47] Removing subquery from shortest_path.test --- test/sql/path-finding/complex_matching.test | 98 +++++++++++++++++++++ test/sql/path-finding/shortest_path.test | 5 +- 2 files changed, 100 insertions(+), 3 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 25bc56a4..6c023842 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,6 +49,102 @@ EDGE TABLES ( LABEL replyOf ); +query IIIII +WITH CTE1 AS (SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Person a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Person a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.person1id) as cnt + FROM Person a + LEFT JOIN Person_knows_Person k ON k.person1id = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + b.rowid, + k.rowid) as temp + FROM Person_knows_Person k + JOIN Person a on a.id = k.person1id + JOIN Person b on b.id = k.person2id) +SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, + a.firstname as a_name, + b.rowid as b_rowid, + b.id as b_id, + t.id as t_id +FROM Person a, + Person b, + person_hasInterest_Tag i, + Tag t, + (select count(cte1.temp) as temp from cte1) __x +WHERE a.id = 28587302322180 + and b.id = i.PersonId + and t.id = i.TagId + and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 +ORDER BY b_id, t_id +---- +[33, 77, 36] Bryn 36 28587302322204 6 +[33, 77, 36] Bryn 36 28587302322204 588 +[33, 77, 36] Bryn 36 28587302322204 1021 +[33, 77, 36] Bryn 36 28587302322204 1767 +[33, 77, 36] Bryn 36 28587302322204 1940 +[33, 77, 36] Bryn 36 28587302322204 1995 +[33, 77, 36] Bryn 36 28587302322204 2018 +[33, 77, 36] Bryn 36 28587302322204 5174 +[33, 77, 36] Bryn 36 28587302322204 6413 +[33, 77, 36] Bryn 36 28587302322204 7328 +[33, 77, 36] Bryn 36 28587302322204 9170 +[33, 77, 36] Bryn 36 28587302322204 11695 +[33, 77, 36] Bryn 36 28587302322204 12002 +[33, 78, 38] Bryn 38 28587302322223 775 +[33, 78, 38] Bryn 38 28587302322223 1938 +[33, 79, 39] Bryn 39 30786325577731 196 +[33, 79, 39] Bryn 39 30786325577731 1031 +[33, 80, 43] Bryn 43 32985348833329 3 +[33, 80, 43] Bryn 43 32985348833329 139 +[33, 80, 43] Bryn 43 32985348833329 470 +[33, 80, 43] Bryn 43 32985348833329 580 +[33, 80, 43] Bryn 43 32985348833329 1985 +[33, 80, 43] Bryn 43 32985348833329 2058 +[33, 80, 43] Bryn 43 32985348833329 2777 +[33, 80, 43] Bryn 43 32985348833329 2836 +[33, 80, 43] Bryn 43 32985348833329 5114 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 + +query II +-FROM GRAPH_TABLE (snb + MATCH (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (p.id as p_id, p2.id as p2_id, t.id) + ) tmp + limit 10; +---- + + query II -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) @@ -58,6 +154,8 @@ query II ---- + + statement ok -FROM GRAPH_TABLE (snb MATCH p = ANY SHORTEST (a:Person where a.id = 28587302322180)-[k:knows]->{1,3}(b:Person) diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index ce6035f6..b249eae4 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -114,10 +114,9 @@ WITH cte1 AS ( FROM Know k JOIN student a on a.id = k.src JOIN student c on c.id = k.dst -) SELECT named_subpath.path as path, named_subpath.a_name as a_name, named_subpath.b_name as b_name -FROM (SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid) as path, a.name as a_name, b.name as b_name +) SELECT shortestpath(0, (select count(*) from student), a.rowid, b.rowid) as path, a.name as a_name, b.name as b_name FROM student a, student b, (select count(cte1.temp) * 0 as temp from cte1) __x - WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid) between 1 and 3) named_subpath + WHERE a.name = 'Daniel' and __x.temp * 0 + iterativelength(0, (select count(*) from student), a.rowid, b.rowid) between 1 and 3 ---- [0, 0, 1] Daniel Tavneet [0, 1, 2] Daniel Gabor From 5490f5fb43c454e97be2968a9287a11a5afd4ef5 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 12:15:44 +0100 Subject: [PATCH 06/47] Creating AddPathFinding function --- .../functions/tablefunctions/match.hpp | 10 + .../functions/tablefunctions/match.cpp | 246 ++++++++++-------- duckpgq/src/duckpgq_extension.cpp | 2 +- 3 files changed, 148 insertions(+), 110 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 0b35a3d9..05a2b62c 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -25,6 +25,8 @@ struct PGQMatchFunction : public TableFunction { bool done = false; }; + + static shared_ptr FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); static void @@ -106,5 +108,13 @@ struct PGQMatchFunction : public TableFunction { const string &next_binding, shared_ptr &edge_table, const string &path_finding_udf); + + + static void AddPathFinding(unique_ptr &select_node, + unique_ptr &from_clause, + vector> conditions, + const string &prev_binding, const string &edge_binding, const string &next_binding, + shared_ptr &edge_table, + SubPath* subpath); }; } // namespace duckdb \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index f1294b76..c7421a68 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -515,17 +515,114 @@ void UnnestSubpath(unique_ptr &subpath, } +void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, + unique_ptr &from_clause, + vector> conditions, + const string &prev_binding, const string &edge_binding, const string &next_binding, + shared_ptr &edge_table, + SubPath* subpath) { + + //! START + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b + select_node->cte_map.map["cte1"] = CreateCSRCTE( + edge_table, prev_binding, + edge_binding, + next_binding); + + //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x + auto temp_cte_select_subquery = CreateCountCTESubquery(); + + auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); + + //! src alias (FROM src a) + auto src_vertex_ref = make_uniq(); + src_vertex_ref->table_name = edge_table->source_reference; + src_vertex_ref->alias = prev_binding; + + cross_join_src_dst->left = std::move(src_vertex_ref); + + //! dst alias (FROM dst b) + auto dst_vertex_ref = make_uniq(); + dst_vertex_ref->table_name = edge_table->destination_reference; + dst_vertex_ref->alias = next_binding; + + cross_join_src_dst->right = std::move(dst_vertex_ref); + + auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); + cross_join_with_cte->left = std::move(temp_cte_select_subquery); + cross_join_with_cte->right = std::move(cross_join_src_dst); + + if (from_clause) { + // create a cross join since there is already something in the + // from clause + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(from_clause); + from_join->right = std::move(cross_join_with_cte); + from_clause = std::move(from_join); + } else { + from_clause = std::move(cross_join_with_cte); + } + //! END + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, + //! dst b + + //! START + //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) + //! from dst c, a.rowid, b.rowid) between lower and upper + + auto src_row_id = make_uniq( + "rowid", prev_binding); + auto dst_row_id = make_uniq( + "rowid", next_binding); + auto csr_id = + make_uniq(Value::INTEGER((int32_t)0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back(std::move(GetCountTable( + edge_table, prev_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto reachability_function = make_uniq( + "iterativelength", std::move(pathfinding_children)); + + auto cte_col_ref = make_uniq("temp", "__x"); + + vector> addition_children; + addition_children.push_back(std::move(cte_col_ref)); + addition_children.push_back(std::move(reachability_function)); + + auto addition_function = make_uniq( + "add", std::move(addition_children)); + auto lower_limit = + make_uniq(Value::INTEGER(subpath.lower)); + auto upper_limit = + make_uniq(Value::INTEGER(subpath.upper)); + auto between_expression = make_uniq( + std::move(addition_function), std::move(lower_limit), + std::move(upper_limit)); + conditions.push_back(std::move(between_expression)); + + //! END + //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) + //! from src s, a.rowid, b.rowid) between lower and upper +} + + unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &) { - auto data = make_uniq(); auto duckpgq_state_entry = context.registered_state.find("duckpgq"); auto duckpgq_state = (DuckPGQState *)duckpgq_state_entry->second.get(); - auto ref = dynamic_cast( + auto ref = dynamic_cast( duckpgq_state->transform_expression.get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); - vector> conditions; + auto data = make_uniq(); + + + vector> conditions; auto select_node = make_uniq(); unordered_map alias_map; @@ -567,125 +664,56 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, for (idx_t idx_j = 1; idx_j < ref->path_patterns[idx_i]->path_elements.size(); idx_j = idx_j + 2) { - PathElement *edge_element = - GetPathElement(path_pattern->path_elements[idx_j]); - if (!edge_element) { - auto subpath = - reinterpret_cast(path_pattern->path_elements[idx_j].get()); - edge_element = GetPathElement(subpath->path_list[idx_j]); - } + PathElement *next_vertex_element = GetPathElement(path_pattern->path_elements[idx_j + 1]); if (!next_vertex_element) { - auto subpath = + auto next_vertex_subpath = reinterpret_cast(path_pattern->path_elements[0].get()); next_vertex_element = - GetPathElement(subpath->path_list[idx_j + 1]); + GetPathElement(next_vertex_subpath->path_list[idx_j + 1]); } if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { throw BinderException("Vertex and edge patterns must be alternated."); } - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - CheckInheritance(edge_table, edge_element, conditions); - auto next_vertex_table = + + PathElement *edge_element = + GetPathElement(path_pattern->path_elements[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_pattern->path_elements[idx_j].get()); + conditions.push_back(std::move(edge_subpath->where_clause)); + if (edge_subpath->path_list.size() > 1) { + // todo(dtenwolde) deal with multiple elements in subpath + throw NotImplementedException("Subpath with multiple elements is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, *pg_table); + + + if (edge_subpath->lower == 1 && edge_subpath->upper == 1) { + // No need to do path-finding + } else { + // Create CSR, add shortestpath to select, add iterativelength to where. + AddPathFinding(select_node, from_clause, conditions, + previous_vertex_element->variable_binding, + edge_element->variable_binding, + next_vertex_element->variable_binding, + edge_table, + edge_subpath); + } + } else { + // The edge element is a path element without WHERE or path-finding. + } + auto edge_table = FindGraphTable(edge_element->label, *pg_table); + CheckInheritance(edge_table, edge_element, conditions); + auto next_vertex_table = FindGraphTable(next_vertex_element->label, *pg_table); CheckInheritance(next_vertex_table, next_vertex_element, conditions); - if (path_pattern->path_elements[idx_j]->path_reference_type == - PGQPathReferenceType::SUBPATH) { - auto *subpath = reinterpret_cast( - path_pattern->path_elements[idx_j].get()); - if (subpath->upper > 1) { - - //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b - select_node->cte_map.map["cte1"] = CreateCSRCTE( - edge_table, previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding); - - //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - auto temp_cte_select_subquery = CreateCountCTESubquery(); - - auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); - - //! src alias (FROM src a) - auto src_vertex_ref = make_uniq(); - src_vertex_ref->table_name = edge_table->source_reference; - src_vertex_ref->alias = previous_vertex_element->variable_binding; - - cross_join_src_dst->left = std::move(src_vertex_ref); - - //! dst alias (FROM dst b) - auto dst_vertex_ref = make_uniq(); - dst_vertex_ref->table_name = edge_table->destination_reference; - dst_vertex_ref->alias = next_vertex_element->variable_binding; - - cross_join_src_dst->right = std::move(dst_vertex_ref); - - auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - cross_join_with_cte->left = std::move(temp_cte_select_subquery); - cross_join_with_cte->right = std::move(cross_join_src_dst); - - if (from_clause) { - // create a cross join since there is already something in the - // from clause - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(from_clause); - from_join->right = std::move(cross_join_with_cte); - from_clause = std::move(from_join); - } else { - from_clause = std::move(cross_join_with_cte); - } - //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, - //! dst b - - //! START - //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) - //! from dst c, a.rowid, b.rowid) between lower and upper - - auto src_row_id = make_uniq( - "rowid", previous_vertex_element->variable_binding); - auto dst_row_id = make_uniq( - "rowid", next_vertex_element->variable_binding); - auto csr_id = - make_uniq(Value::INTEGER((int32_t)0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back(std::move(GetCountTable( - edge_table, previous_vertex_element->variable_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto reachability_function = make_uniq( - "iterativelength", std::move(pathfinding_children)); - - auto cte_col_ref = make_uniq("temp", "__x"); - - vector> addition_children; - addition_children.push_back(std::move(cte_col_ref)); - addition_children.push_back(std::move(reachability_function)); - - auto addition_function = make_uniq( - "add", std::move(addition_children)); - auto lower_limit = - make_uniq(Value::INTEGER(subpath->lower)); - auto upper_limit = - make_uniq(Value::INTEGER(subpath->upper)); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); - - //! END - //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid) between lower and upper - } - } + // check aliases alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 2e5b2428..659913b8 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -120,7 +120,7 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, dynamic_cast(duckpgq_state->parse_data.get()); if (!duckpgq_parse_data) { - throw BinderException("Not DuckPGQ parse data"); + throw BinderException("No DuckPGQ parse data found"); } auto statement = dynamic_cast(duckpgq_parse_data->statement.get()); From 62acb9ed1116f99f2bda4f129532a59accc78125 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 12:15:58 +0100 Subject: [PATCH 07/47] Fix dot and arrow --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index c7421a68..44e719ab 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -596,9 +596,9 @@ void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, auto addition_function = make_uniq( "add", std::move(addition_children)); auto lower_limit = - make_uniq(Value::INTEGER(subpath.lower)); + make_uniq(Value::INTEGER(subpath->lower)); auto upper_limit = - make_uniq(Value::INTEGER(subpath.upper)); + make_uniq(Value::INTEGER(subpath->upper)); auto between_expression = make_uniq( std::move(addition_function), std::move(lower_limit), std::move(upper_limit)); From 5b9623637749cd1d7857f06898745c9ace1a75a7 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 13:40:45 +0100 Subject: [PATCH 08/47] Made stuff const --- .../functions/tablefunctions/match.hpp | 20 +++---- .../functions/tablefunctions/match.cpp | 60 +++++++++---------- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 05a2b62c..a05d5339 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -30,7 +30,7 @@ struct PGQMatchFunction : public TableFunction { static shared_ptr FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); static void - CheckInheritance(shared_ptr &tableref, + CheckInheritance(const shared_ptr &tableref, PathElement *element, vector> &conditions); @@ -44,7 +44,7 @@ struct PGQMatchFunction : public TableFunction { const string &vertex_alias, const string &edge_alias); static PathElement * - GetPathElement(unique_ptr &path_reference); + GetPathElement(const unique_ptr &path_reference); static unique_ptr GetCountTable(const shared_ptr &edge_table, @@ -62,13 +62,13 @@ struct PGQMatchFunction : public TableFunction { const string &edge_binding, const string &prev_binding, const string &next_binding); - static void EdgeTypeAny(shared_ptr &edge_table, + static void EdgeTypeAny(const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions); - static void EdgeTypeLeft(shared_ptr &edge_table, + static void EdgeTypeLeft(const shared_ptr &edge_table, const string &next_table_name, const string &prev_table_name, const string &edge_binding, @@ -76,7 +76,7 @@ struct PGQMatchFunction : public TableFunction { const string &next_binding, vector> &conditions); - static void EdgeTypeRight(shared_ptr &edge_table, + static void EdgeTypeRight(const shared_ptr &edge_table, const string &next_table_name, const string &prev_table_name, const string &edge_binding, @@ -85,7 +85,7 @@ struct PGQMatchFunction : public TableFunction { vector> &conditions); static void EdgeTypeLeftRight( - shared_ptr &edge_table, const string &edge_binding, + const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions, unordered_map &alias_map, int32_t &extra_alias_counter); @@ -106,15 +106,15 @@ struct PGQMatchFunction : public TableFunction { static unique_ptr CreatePathFindingFunction(const string &prev_binding, const string &next_binding, - shared_ptr &edge_table, + const shared_ptr &edge_table, const string &path_finding_udf); - static void AddPathFinding(unique_ptr &select_node, + static void AddPathFinding(const unique_ptr &select_node, unique_ptr &from_clause, vector> conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, - shared_ptr &edge_table, - SubPath* subpath); + const shared_ptr &edge_table, + const SubPath* subpath); }; } // namespace duckdb \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 44e719ab..50c928b8 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -3,14 +3,12 @@ #include "duckdb/parser/tableref/matchref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/expression/function_expression.hpp" #include "duckdb/parser/expression/subquery_expression.hpp" #include "duckdb/parser/expression/cast_expression.hpp" -#include "duckdb/parser/expression/operator_expression.hpp" #include "duckdb/parser/expression/between_expression.hpp" #include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/expression/comparison_expression.hpp" @@ -32,7 +30,7 @@ namespace duckdb { shared_ptr PGQMatchFunction::FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table) { - auto graph_table_entry = pg_table.label_map.find(label); + const auto graph_table_entry = pg_table.label_map.find(label); if (graph_table_entry == pg_table.label_map.end()) { throw BinderException("The label %s is not registered in property graph %s", label, pg_table.property_graph_name); @@ -42,19 +40,19 @@ PGQMatchFunction::FindGraphTable(const string &label, } void PGQMatchFunction::CheckInheritance( - shared_ptr &tableref, PathElement *element, + const shared_ptr &tableref, PathElement *element, vector> &conditions) { if (tableref->main_label == element->label) { return; } auto constant_expression_two = make_uniq(Value::INTEGER((int32_t)2)); - auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), + const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), element->label); - auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); + const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); auto constant_expression_idx_label = - make_uniq(Value::INTEGER((int32_t)idx_of_element)); + make_uniq(Value::INTEGER(static_cast(idx_of_element))); vector> power_of_children; power_of_children.push_back(std::move(constant_expression_two)); @@ -73,7 +71,7 @@ void PGQMatchFunction::CheckInheritance( make_uniq("&", std::move(and_children)); auto constant_expression_idx_label_comparison = make_uniq( - Value::INTEGER((int32_t)idx_of_element + 1)); + Value::INTEGER(static_cast(idx_of_element + 1))); auto subset_compare = make_uniq( ExpressionType::COMPARE_EQUAL, std::move(and_expression), @@ -128,7 +126,7 @@ unique_ptr PGQMatchFunction::CreateMatchJoinExpression( } PathElement *PGQMatchFunction::GetPathElement( - unique_ptr &path_reference) { + const unique_ptr &path_reference) { if (path_reference->path_reference_type == PGQPathReferenceType::PATH_ELEMENT) { return reinterpret_cast(path_reference.get()); @@ -243,11 +241,11 @@ unique_ptr PGQMatchFunction::CreateCountCTESubquery() { unique_ptr PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, - const string &edge_binding, - const string &prev_binding, - const string &next_binding) { + const string &prev_binding, + const string &edge_binding, + const string &next_binding) { auto csr_edge_id_constant = - make_uniq(Value::INTEGER((int32_t)0)); + make_uniq(Value::INTEGER(0)); auto count_create_edge_select = GetCountTable(edge_table, prev_binding); auto cast_subquery_expr = make_uniq(); @@ -366,7 +364,7 @@ PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, } void PGQMatchFunction::EdgeTypeAny( - shared_ptr &edge_table, const string &edge_binding, + const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions) { // (a) src.key = edge.src @@ -401,7 +399,7 @@ void PGQMatchFunction::EdgeTypeAny( } void PGQMatchFunction::EdgeTypeLeft( - shared_ptr &edge_table, const string &next_table_name, + const shared_ptr &edge_table, const string &next_table_name, const string &prev_table_name, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions) { @@ -415,7 +413,7 @@ void PGQMatchFunction::EdgeTypeLeft( } void PGQMatchFunction::EdgeTypeRight( - shared_ptr &edge_table, const string &next_table_name, + const shared_ptr &edge_table, const string &next_table_name, const string &prev_table_name, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions) { @@ -429,7 +427,7 @@ void PGQMatchFunction::EdgeTypeRight( } void PGQMatchFunction::EdgeTypeLeftRight( - shared_ptr &edge_table, const string &edge_binding, + const shared_ptr &edge_table, const string &edge_binding, const string &prev_binding, const string &next_binding, vector> &conditions, unordered_map &alias_map, int32_t &extra_alias_counter) { @@ -443,7 +441,7 @@ void PGQMatchFunction::EdgeTypeLeftRight( ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), std::move(dst_left_expr)); - auto additional_edge_alias = + const auto additional_edge_alias = edge_binding + std::to_string(extra_alias_counter); extra_alias_counter++; @@ -489,11 +487,11 @@ CreateWhereClause(vector> &conditions) { unique_ptr PGQMatchFunction::CreatePathFindingFunction( const string &prev_binding, const string &next_binding, - shared_ptr &edge_table, + const shared_ptr &edge_table, const string &path_finding_udf) { auto src_row_id = make_uniq("rowid", prev_binding); auto dst_row_id = make_uniq("rowid", next_binding); - auto csr_id = make_uniq(Value::INTEGER((int32_t)0)); + auto csr_id = make_uniq(Value::INTEGER(0)); vector> pathfinding_children; pathfinding_children.push_back(std::move(csr_id)); @@ -506,22 +504,21 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( std::move(pathfinding_children)); } -void UnnestSubpath(unique_ptr &subpath, +void UnnestSubpath(const unique_ptr &subpath, vector> &conditions, unique_ptr &from_clause) { - auto path_element = + const auto path_element = reinterpret_cast(subpath.get()); std::cout << path_element->path_variable; } -void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, +void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, unique_ptr &from_clause, vector> conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, - shared_ptr &edge_table, - SubPath* subpath) { - + const shared_ptr &edge_table, + const SubPath* subpath) { //! START //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b select_node->cte_map.map["cte1"] = CreateCSRCTE( @@ -596,9 +593,9 @@ void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, auto addition_function = make_uniq( "add", std::move(addition_children)); auto lower_limit = - make_uniq(Value::INTEGER(subpath->lower)); + make_uniq(Value::INTEGER(static_cast(subpath->lower))); auto upper_limit = - make_uniq(Value::INTEGER(subpath->upper)); + make_uniq(Value::INTEGER(static_cast(subpath->upper))); auto between_expression = make_uniq( std::move(addition_function), std::move(lower_limit), std::move(upper_limit)); @@ -613,7 +610,7 @@ void PGQMatchFunction::AddPathFinding(unique_ptr &select_node, unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, TableFunctionBindInput &) { auto duckpgq_state_entry = context.registered_state.find("duckpgq"); - auto duckpgq_state = (DuckPGQState *)duckpgq_state_entry->second.get(); + auto duckpgq_state = dynamic_cast(duckpgq_state_entry->second.get()); auto ref = dynamic_cast( duckpgq_state->transform_expression.get()); @@ -621,12 +618,10 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, auto data = make_uniq(); - vector> conditions; auto select_node = make_uniq(); unordered_map alias_map; - unordered_set named_subpaths; unique_ptr from_clause; int32_t extra_alias_counter = 0; @@ -794,7 +789,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, std::vector> final_column_list; for (auto &expression : ref->column_list) { - auto column_ref = dynamic_cast(expression.get()); + unordered_set named_subpaths; + auto column_ref = dynamic_cast(expression.get()); if (column_ref != nullptr) { if (named_subpaths.count(column_ref->column_names[0]) && column_ref->column_names.size() == 1) { From be255cec1a713185095622fc3e08e9cbc4f9d050 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 15:10:15 +0100 Subject: [PATCH 09/47] Added alias map --- .../functions/tablefunctions/match.hpp | 3 +- .../functions/tablefunctions/match.cpp | 57 ++++++++++--------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index a05d5339..7ea8ad31 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -115,6 +115,7 @@ struct PGQMatchFunction : public TableFunction { vector> conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, const shared_ptr &edge_table, - const SubPath* subpath); + const SubPath* subpath, + unordered_map &alias_map); }; } // namespace duckdb \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 50c928b8..63938bf1 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -518,50 +518,52 @@ void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, vector> conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, const shared_ptr &edge_table, - const SubPath* subpath) { + const SubPath* subpath, + unordered_map &alias_map) { //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x select_node->cte_map.map["cte1"] = CreateCSRCTE( edge_table, prev_binding, edge_binding, next_binding); - - //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - auto temp_cte_select_subquery = CreateCountCTESubquery(); - - auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); + // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); //! src alias (FROM src a) - auto src_vertex_ref = make_uniq(); - src_vertex_ref->table_name = edge_table->source_reference; - src_vertex_ref->alias = prev_binding; + // auto src_vertex_ref = make_uniq(); + // src_vertex_ref->table_name = edge_table->source_reference; + // src_vertex_ref->alias = prev_binding; + alias_map[prev_binding] = edge_table->source_reference; + alias_map[next_binding] = edge_table->destination_reference; - cross_join_src_dst->left = std::move(src_vertex_ref); + // cross_join_src_dst->left = std::move(src_vertex_ref); //! dst alias (FROM dst b) - auto dst_vertex_ref = make_uniq(); - dst_vertex_ref->table_name = edge_table->destination_reference; - dst_vertex_ref->alias = next_binding; + // auto dst_vertex_ref = make_uniq(); + // dst_vertex_ref->table_name = edge_table->destination_reference; + // dst_vertex_ref->alias = next_binding; - cross_join_src_dst->right = std::move(dst_vertex_ref); + // cross_join_src_dst->right = std::move(dst_vertex_ref); - auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - cross_join_with_cte->left = std::move(temp_cte_select_subquery); - cross_join_with_cte->right = std::move(cross_join_src_dst); + //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x + // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); + // cross_join_with_cte->left = std::move(temp_cte_select_subquery); + // cross_join_with_cte->right = std::move(cross_join_src_dst); + + auto temp_cte_select_subquery = CreateCountCTESubquery(); + from_clause = std::move(temp_cte_select_subquery); if (from_clause) { // create a cross join since there is already something in the // from clause auto from_join = make_uniq(JoinRefType::CROSS); from_join->left = std::move(from_clause); - from_join->right = std::move(cross_join_with_cte); + from_join->right = std::move(temp_cte_select_subquery); from_clause = std::move(from_join); } else { - from_clause = std::move(cross_join_with_cte); + from_clause = std::move(temp_cte_select_subquery); } //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, - //! dst b + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x //! START //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) @@ -616,7 +618,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, duckpgq_state->transform_expression.get()); auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); - auto data = make_uniq(); + auto data = make_uniq(); vector> conditions; @@ -632,7 +634,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, PathElement *previous_vertex_element = GetPathElement(path_pattern->path_elements[0]); if (!previous_vertex_element) { - UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); + // todo(dtenwolde) this will be hit with MATCH o = . Or with a WHERE. + UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); // auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( // path_pattern, pg_table, ref->column_list, named_subpaths); @@ -649,6 +652,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, // from_clause = std::move(subpath_pattern_subquery); // } } else { + // previous_vertex_element auto previous_vertex_table = FindGraphTable(previous_vertex_element->label, *pg_table); CheckInheritance(previous_vertex_table, previous_vertex_element, @@ -686,8 +690,6 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, } edge_element = GetPathElement(edge_subpath->path_list[0]); auto edge_table = FindGraphTable(edge_element->label, *pg_table); - - if (edge_subpath->lower == 1 && edge_subpath->upper == 1) { // No need to do path-finding } else { @@ -697,7 +699,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, edge_element->variable_binding, next_vertex_element->variable_binding, edge_table, - edge_subpath); + edge_subpath, + alias_map); } } else { // The edge element is a path element without WHERE or path-finding. From 22fc8d941c7355ef7bad030e6566a2c2baff97bc Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Fri, 12 Jan 2024 15:10:18 +0100 Subject: [PATCH 10/47] Added alias map --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index 7aafafca..e8e22a65 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit 7aafafcabc08298ea907c6059ad281d081dd0c6f +Subproject commit e8e22a65ecb3f7676c631badf52a948f6b807b38 From 14721d50a79854f558dc6fdca86e36c7789bda17 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:00:33 +0100 Subject: [PATCH 11/47] Now builds --- .../functions/tablefunctions/match.hpp | 17 +- .../functions/tablefunctions/match.cpp | 201 +++++++++--------- 2 files changed, 117 insertions(+), 101 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 7ea8ad31..bff1878c 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -112,10 +112,21 @@ struct PGQMatchFunction : public TableFunction { static void AddPathFinding(const unique_ptr &select_node, unique_ptr &from_clause, - vector> conditions, + vector> &conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, const shared_ptr &edge_table, - const SubPath* subpath, - unordered_map &alias_map); + const SubPath* subpath); + + static void AddEdgeJoins(const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter); }; } // namespace duckdb \ No newline at end of file diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 63938bf1..c3d7e0ec 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -130,12 +130,12 @@ PathElement *PGQMatchFunction::GetPathElement( if (path_reference->path_reference_type == PGQPathReferenceType::PATH_ELEMENT) { return reinterpret_cast(path_reference.get()); - } else if (path_reference->path_reference_type == + } if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { return nullptr; - } else { - throw InternalException("Unknown path reference type detected"); - } + } + throw InternalException("Unknown path reference type detected"); + } unique_ptr @@ -253,7 +253,7 @@ PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, vector> csr_vertex_children; csr_vertex_children.push_back( - make_uniq(Value::INTEGER((int32_t)0))); + make_uniq(Value::INTEGER(0))); auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); @@ -504,22 +504,59 @@ unique_ptr PGQMatchFunction::CreatePathFindingFunction( std::move(pathfinding_children)); } -void UnnestSubpath(const unique_ptr &subpath, - vector> &conditions, - unique_ptr &from_clause) { - const auto path_element = - reinterpret_cast(subpath.get()); - std::cout << path_element->path_variable; - +// void UnnestSubpath(const unique_ptr &subpath, +// vector> &conditions, +// unique_ptr &from_clause) { +// const auto path_element = +// reinterpret_cast(subpath.get()); +// std::cout << path_element->path_variable; +// +// } + +void PGQMatchFunction::AddEdgeJoins(const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter) { + switch (edge_type) { + case PGQMatchType::MATCH_EDGE_ANY: { + select_node->modifiers.push_back(make_uniq()); + EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions); + break; + } + case PGQMatchType::MATCH_EDGE_LEFT: + EdgeTypeLeft(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, + edge_binding, prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_RIGHT: + EdgeTypeRight(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, + edge_binding, prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + EdgeTypeLeftRight(edge_table, edge_binding, + prev_binding, next_binding, conditions, + alias_map, extra_alias_counter); + break; + } + default: + throw InternalException("Unknown match type found"); + } } void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, unique_ptr &from_clause, - vector> conditions, + vector> &conditions, const string &prev_binding, const string &edge_binding, const string &next_binding, const shared_ptr &edge_table, - const SubPath* subpath, - unordered_map &alias_map) { + const SubPath* subpath) { //! START //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x select_node->cte_map.map["cte1"] = CreateCSRCTE( @@ -532,8 +569,7 @@ void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, // auto src_vertex_ref = make_uniq(); // src_vertex_ref->table_name = edge_table->source_reference; // src_vertex_ref->alias = prev_binding; - alias_map[prev_binding] = edge_table->source_reference; - alias_map[next_binding] = edge_table->destination_reference; + // cross_join_src_dst->left = std::move(src_vertex_ref); @@ -635,7 +671,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, GetPathElement(path_pattern->path_elements[0]); if (!previous_vertex_element) { // todo(dtenwolde) this will be hit with MATCH o = . Or with a WHERE. - UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); + throw NotImplementedException("WHERE in the first element is not supported yet. Nor is named subpaths"); + // UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); // auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( // path_pattern, pg_table, ref->column_list, named_subpaths); @@ -676,8 +713,9 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { throw BinderException("Vertex and edge patterns must be alternated."); } - - + auto next_vertex_table = + FindGraphTable(next_vertex_element->label, *pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); PathElement *edge_element = GetPathElement(path_pattern->path_elements[idx_j]); if (!edge_element) { @@ -690,101 +728,68 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, } edge_element = GetPathElement(edge_subpath->path_list[0]); auto edge_table = FindGraphTable(edge_element->label, *pg_table); - if (edge_subpath->lower == 1 && edge_subpath->upper == 1) { - // No need to do path-finding - } else { - // Create CSR, add shortestpath to select, add iterativelength to where. + alias_map[previous_vertex_element->variable_binding] = edge_table->source_reference; + alias_map[next_vertex_element->variable_binding] = edge_table->destination_reference; + if (edge_subpath->upper > 1) { + // Add the path-finding AddPathFinding(select_node, from_clause, conditions, previous_vertex_element->variable_binding, edge_element->variable_binding, next_vertex_element->variable_binding, - edge_table, - edge_subpath, - alias_map); + edge_table, edge_subpath); + } else { + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); } } else { // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, *pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + alias_map[next_vertex_element->variable_binding] = + next_vertex_table->table_name; + alias_map[edge_element->variable_binding] = edge_table->table_name; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR + // (b.dst = a.id AND b.src + // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) + } - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - CheckInheritance(edge_table, edge_element, conditions); - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, *pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - - - // check aliases - alias_map[next_vertex_element->variable_binding] = - next_vertex_table->table_name; - alias_map[edge_element->variable_binding] = edge_table->table_name; - -// if (!path_finding) { - switch (edge_element->match_type) { - case PGQMatchType::MATCH_EDGE_ANY: { - select_node->modifiers.push_back(make_uniq()); - EdgeTypeAny(edge_table, edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - } - case PGQMatchType::MATCH_EDGE_LEFT: - EdgeTypeLeft(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_RIGHT: - EdgeTypeRight(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - EdgeTypeLeftRight(edge_table, edge_element->variable_binding, - previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, - alias_map, extra_alias_counter); - break; - } - default: - throw InternalException("Unknown match type found"); - } -// } previous_vertex_element = next_vertex_element; previous_vertex_table = next_vertex_table; - // Check the edge type - // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id - // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id - // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR - // (b.dst = a.id AND b.src - // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND - // (b.dst = a.id AND b.src - //= c.id) + } } } -// if (!path_finding) { - // Go through all aliases encountered - for (auto &table_alias_entry : alias_map) { - auto table_ref = make_uniq(); - table_ref->table_name = table_alias_entry.second; - table_ref->alias = table_alias_entry.first; - - if (from_clause) { - auto new_root = make_uniq(JoinRefType::CROSS); - new_root->left = std::move(from_clause); - new_root->right = std::move(table_ref); - from_clause = std::move(new_root); - } else { - from_clause = std::move(table_ref); - } + // Go through all aliases encountered + for (auto &table_alias_entry : alias_map) { + auto table_ref = make_uniq(); + table_ref->table_name = table_alias_entry.second; + table_ref->alias = table_alias_entry.first; + + if (from_clause) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(from_clause); + new_root->right = std::move(table_ref); + from_clause = std::move(new_root); + } else { + from_clause = std::move(table_ref); } -// } - select_node->from_table = std::move(from_clause); + } + + select_node->from_table = std::move(from_clause); if (ref->where_clause) { conditions.push_back(std::move(ref->where_clause)); From eae111d2981ceeeab64e3c42176fc7f476637d7d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:01:06 +0100 Subject: [PATCH 12/47] Reverted avg.test changes --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index e8e22a65..cc7e2e69 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit e8e22a65ecb3f7676c631badf52a948f6b807b38 +Subproject commit cc7e2e6995606a06c856030a49504e68e9cbead9 From db958bc1a50d91cbac3d487475d93dc3cc560fcb Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:10:22 +0100 Subject: [PATCH 13/47] Fix wrong binding added to alias map --- .../duckpgq/functions/tablefunctions/match.hpp | 8 ++++---- .../duckpgq/functions/tablefunctions/match.cpp | 18 +++++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index bff1878c..e54a76a4 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -90,10 +90,10 @@ struct PGQMatchFunction : public TableFunction { vector> &conditions, unordered_map &alias_map, int32_t &extra_alias_counter); -// static PathElement * -// HandleNestedSubPath(unique_ptr &path_reference, -// vector> &conditions, -// idx_t element_idx); + static PathElement * + HandleNestedSubPath(unique_ptr &path_reference, + vector> &conditions, + idx_t element_idx); static unique_ptr MatchBindReplace(ClientContext &context, TableFunctionBindInput &input); diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index c3d7e0ec..c404945c 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -463,12 +463,12 @@ void PGQMatchFunction::EdgeTypeLeftRight( conditions.push_back(std::move(combined_expr)); } -//PathElement *PGQMatchFunction::HandleNestedSubPath( -// unique_ptr &path_reference, -// vector> &conditions, idx_t element_idx) { -// auto subpath = reinterpret_cast(path_reference.get()); -// return GetPathElement(subpath->path_list[element_idx], conditions); -//} +PathElement *PGQMatchFunction::HandleNestedSubPath( + unique_ptr &path_reference, + vector> &conditions, idx_t element_idx) { + auto subpath = reinterpret_cast(path_reference.get()); + return GetPathElement(subpath->path_list[element_idx]); +} unique_ptr CreateWhereClause(vector> &conditions) { @@ -728,7 +728,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, } edge_element = GetPathElement(edge_subpath->path_list[0]); auto edge_table = FindGraphTable(edge_element->label, *pg_table); - alias_map[previous_vertex_element->variable_binding] = edge_table->source_reference; + alias_map[edge_element->variable_binding] = edge_table->source_reference; alias_map[next_vertex_element->variable_binding] = edge_table->destination_reference; if (edge_subpath->upper > 1) { // Add the path-finding @@ -762,13 +762,9 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND // (b.dst = a.id AND b.src //= c.id) - } - previous_vertex_element = next_vertex_element; previous_vertex_table = next_vertex_table; - - } } } From 3efe3fbd04c0fa22e00cefb0a8dfb2a67397cda8 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:13:03 +0100 Subject: [PATCH 14/47] Nvm, changed where alias is added --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index c404945c..41c6df3e 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -716,6 +716,8 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, auto next_vertex_table = FindGraphTable(next_vertex_element->label, *pg_table); CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; + PathElement *edge_element = GetPathElement(path_pattern->path_elements[idx_j]); if (!edge_element) { @@ -728,8 +730,6 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, } edge_element = GetPathElement(edge_subpath->path_list[0]); auto edge_table = FindGraphTable(edge_element->label, *pg_table); - alias_map[edge_element->variable_binding] = edge_table->source_reference; - alias_map[next_vertex_element->variable_binding] = edge_table->destination_reference; if (edge_subpath->upper > 1) { // Add the path-finding AddPathFinding(select_node, from_clause, conditions, @@ -738,6 +738,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, next_vertex_element->variable_binding, edge_table, edge_subpath); } else { + alias_map[edge_element->variable_binding] = edge_table->source_reference; AddEdgeJoins(select_node, edge_table, previous_vertex_table, next_vertex_table, edge_element->match_type, edge_element->variable_binding, previous_vertex_element->variable_binding, From add34507844c6194b0650b066bc379ae934f72fc Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:27:30 +0100 Subject: [PATCH 15/47] Simple complex query works --- .../functions/tablefunctions/match.cpp | 11 ++-- test/sql/path-finding/complex_matching.test | 56 +++++++++++++++++-- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 41c6df3e..475b7501 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -586,7 +586,7 @@ void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, // cross_join_with_cte->right = std::move(cross_join_src_dst); auto temp_cte_select_subquery = CreateCountCTESubquery(); - from_clause = std::move(temp_cte_select_subquery); + // from_clause = std::move(temp_cte_select_subquery); if (from_clause) { // create a cross join since there is already something in the @@ -610,7 +610,7 @@ void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, auto dst_row_id = make_uniq( "rowid", next_binding); auto csr_id = - make_uniq(Value::INTEGER((int32_t)0)); + make_uniq(Value::INTEGER(0)); vector> pathfinding_children; pathfinding_children.push_back(std::move(csr_id)); @@ -749,12 +749,11 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, auto edge_table = FindGraphTable(edge_element->label, *pg_table); CheckInheritance(edge_table, edge_element, conditions); // check aliases - alias_map[next_vertex_element->variable_binding] = - next_vertex_table->table_name; alias_map[edge_element->variable_binding] = edge_table->table_name; AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, - edge_element->variable_binding, previous_vertex_element->variable_binding, next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); + next_vertex_table, edge_element->match_type,edge_element->variable_binding, + previous_vertex_element->variable_binding, next_vertex_element->variable_binding, + conditions, alias_map, extra_alias_counter); // Check the edge type // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 6c023842..d9dadbcb 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -136,14 +136,60 @@ ORDER BY b_id, t_id [33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 [33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 -query II +query IIII -FROM GRAPH_TABLE (snb - MATCH (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) - COLUMNS (p.id as p_id, p2.id as p2_id, t.id) + MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + WHERE a.id = 28587302322180 + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) ) tmp - limit 10; +ORDER BY b_id, t_id ---- - +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 query II -FROM GRAPH_TABLE (snb From 3f08d0da7ec282c688d94fc505ebfb4429b300ef Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 11:58:30 +0100 Subject: [PATCH 16/47] Working on where clause in first vertex pattern --- .../functions/tablefunctions/match.cpp | 167 ++++++++++-------- test/sql/path-finding/complex_matching.test | 55 ++++++ 2 files changed, 144 insertions(+), 78 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 475b7501..cf714dbf 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -671,8 +671,14 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, GetPathElement(path_pattern->path_elements[0]); if (!previous_vertex_element) { // todo(dtenwolde) this will be hit with MATCH o = . Or with a WHERE. - throw NotImplementedException("WHERE in the first element is not supported yet. Nor is named subpaths"); - // UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); + auto previous_vertex_subpath = reinterpret_cast(path_pattern->path_elements[0].get()); + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + if (previous_vertex_subpath->path_list.size() == 1) { + previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); + } else { + throw NotImplementedException("Named subpaths are not yet supported."); + } + // UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); // auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( // path_pattern, pg_table, ref->column_list, named_subpaths); @@ -688,84 +694,89 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, // // there // from_clause = std::move(subpath_pattern_subquery); // } - } else { - // previous_vertex_element - auto previous_vertex_table = - FindGraphTable(previous_vertex_element->label, *pg_table); - CheckInheritance(previous_vertex_table, previous_vertex_element, - conditions); - alias_map[previous_vertex_element->variable_binding] = - previous_vertex_table->table_name; - - for (idx_t idx_j = 1; - idx_j < ref->path_patterns[idx_i]->path_elements.size(); - idx_j = idx_j + 2) { - - PathElement *next_vertex_element = - GetPathElement(path_pattern->path_elements[idx_j + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = - reinterpret_cast(path_pattern->path_elements[0].get()); - next_vertex_element = - GetPathElement(next_vertex_subpath->path_list[idx_j + 1]); - } - if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - throw BinderException("Vertex and edge patterns must be alternated."); - } - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, *pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; - - PathElement *edge_element = - GetPathElement(path_pattern->path_elements[idx_j]); - if (!edge_element) { - // We are dealing with a subpath - auto edge_subpath = reinterpret_cast(path_pattern->path_elements[idx_j].get()); - conditions.push_back(std::move(edge_subpath->where_clause)); - if (edge_subpath->path_list.size() > 1) { - // todo(dtenwolde) deal with multiple elements in subpath - throw NotImplementedException("Subpath with multiple elements is not yet supported."); - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - if (edge_subpath->upper > 1) { - // Add the path-finding - AddPathFinding(select_node, from_clause, conditions, - previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding, - edge_table, edge_subpath); - } else { - alias_map[edge_element->variable_binding] = edge_table->source_reference; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, - edge_element->variable_binding, previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); + } + auto previous_vertex_table = + FindGraphTable(previous_vertex_element->label, *pg_table); + CheckInheritance(previous_vertex_table, previous_vertex_element, + conditions); + alias_map[previous_vertex_element->variable_binding] = + previous_vertex_table->table_name; + + for (idx_t idx_j = 1; + idx_j < ref->path_patterns[idx_i]->path_elements.size(); + idx_j = idx_j + 2) { + PathElement *next_vertex_element = + GetPathElement(path_pattern->path_elements[idx_j + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_pattern->path_elements[0].get()); + // Check the size of the subpath path list + // if size == 1: + // Path Element with a WHERE + // (){3} Repeated vertices are not supported + // Else: + // Unsure if this is possible to reach. Perhaps at some point with a nested pattern? + // Will be unsupported for now + + next_vertex_element = + GetPathElement(next_vertex_subpath->path_list[idx_j + 1]); + } + if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + throw BinderException("Vertex and edge patterns must be alternated."); } - } else { - // The edge element is a path element without WHERE or path-finding. - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - CheckInheritance(edge_table, edge_element, conditions); - // check aliases - alias_map[edge_element->variable_binding] = edge_table->table_name; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type,edge_element->variable_binding, - previous_vertex_element->variable_binding, next_vertex_element->variable_binding, - conditions, alias_map, extra_alias_counter); - // Check the edge type - // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id - // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id - // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR - // (b.dst = a.id AND b.src - // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND - // (b.dst = a.id AND b.src - //= c.id) - } - previous_vertex_element = next_vertex_element; - previous_vertex_table = next_vertex_table; + auto next_vertex_table = + FindGraphTable(next_vertex_element->label, *pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; + + PathElement *edge_element = + GetPathElement(path_pattern->path_elements[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_pattern->path_elements[idx_j].get()); + conditions.push_back(std::move(edge_subpath->where_clause)); + if (edge_subpath->path_list.size() > 1) { + // todo(dtenwolde) deal with multiple elements in subpath + throw NotImplementedException("Subpath with multiple elements is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, *pg_table); + if (edge_subpath->upper > 1) { + // Add the path-finding + AddPathFinding(select_node, from_clause, conditions, + previous_vertex_element->variable_binding, + edge_element->variable_binding, + next_vertex_element->variable_binding, + edge_table, edge_subpath); + } else { + alias_map[edge_element->variable_binding] = edge_table->source_reference; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); + } + } else { + // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, *pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + alias_map[edge_element->variable_binding] = edge_table->table_name; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type,edge_element->variable_binding, + previous_vertex_element->variable_binding, next_vertex_element->variable_binding, + conditions, alias_map, extra_alias_counter); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR + // (b.dst = a.id AND b.src + // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) } + previous_vertex_element = next_vertex_element; + previous_vertex_table = next_vertex_table; } } diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index d9dadbcb..489abb35 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -191,6 +191,61 @@ Bryn 45 35184372088850 7142 Bryn 45 35184372088850 7689 Bryn 45 35184372088850 9929 +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +---- +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 + + query II -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) From 2c1dca993f3ab0d39593e7180bb003d364e3473f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 12:16:56 +0100 Subject: [PATCH 17/47] Adding check for where in subpath --- .../functions/tablefunctions/match.cpp | 14 ++- test/sql/path-finding/complex_matching.test | 108 +++++++++--------- 2 files changed, 64 insertions(+), 58 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index cf714dbf..dc30c122 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -672,7 +672,9 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, if (!previous_vertex_element) { // todo(dtenwolde) this will be hit with MATCH o = . Or with a WHERE. auto previous_vertex_subpath = reinterpret_cast(path_pattern->path_elements[0].get()); - conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + if (previous_vertex_subpath->where_clause) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } if (previous_vertex_subpath->path_list.size() == 1) { previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); } else { @@ -717,8 +719,10 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, // Else: // Unsure if this is possible to reach. Perhaps at some point with a nested pattern? // Will be unsupported for now - - next_vertex_element = + if (next_vertex_subpath->where_clause) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[idx_j + 1]); } if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || @@ -735,7 +739,9 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, if (!edge_element) { // We are dealing with a subpath auto edge_subpath = reinterpret_cast(path_pattern->path_elements[idx_j].get()); - conditions.push_back(std::move(edge_subpath->where_clause)); + if (edge_subpath->where_clause) { + conditions.push_back(std::move(edge_subpath->where_clause)); + } if (edge_subpath->path_list.size() > 1) { // todo(dtenwolde) deal with multiple elements in subpath throw NotImplementedException("Subpath with multiple elements is not yet supported."); diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 489abb35..aec47aa4 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -136,60 +136,60 @@ ORDER BY b_id, t_id [33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 [33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 -query IIII --FROM GRAPH_TABLE (snb - MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) - WHERE a.id = 28587302322180 - COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) - ) tmp -ORDER BY b_id, t_id ----- -Bryn 36 28587302322204 6 -Bryn 36 28587302322204 588 -Bryn 36 28587302322204 1021 -Bryn 36 28587302322204 1767 -Bryn 36 28587302322204 1940 -Bryn 36 28587302322204 1995 -Bryn 36 28587302322204 2018 -Bryn 36 28587302322204 5174 -Bryn 36 28587302322204 6413 -Bryn 36 28587302322204 7328 -Bryn 36 28587302322204 9170 -Bryn 36 28587302322204 11695 -Bryn 36 28587302322204 12002 -Bryn 38 28587302322223 775 -Bryn 38 28587302322223 1938 -Bryn 39 30786325577731 196 -Bryn 39 30786325577731 1031 -Bryn 43 32985348833329 3 -Bryn 43 32985348833329 139 -Bryn 43 32985348833329 470 -Bryn 43 32985348833329 580 -Bryn 43 32985348833329 1985 -Bryn 43 32985348833329 2058 -Bryn 43 32985348833329 2777 -Bryn 43 32985348833329 2836 -Bryn 43 32985348833329 5114 -Bryn 45 35184372088850 804 -Bryn 45 35184372088850 973 -Bryn 45 35184372088850 1170 -Bryn 45 35184372088850 1185 -Bryn 45 35184372088850 1206 -Bryn 45 35184372088850 1749 -Bryn 45 35184372088850 1908 -Bryn 45 35184372088850 1954 -Bryn 45 35184372088850 2003 -Bryn 45 35184372088850 2786 -Bryn 45 35184372088850 2816 -Bryn 45 35184372088850 2969 -Bryn 45 35184372088850 2985 -Bryn 45 35184372088850 4865 -Bryn 45 35184372088850 6399 -Bryn 45 35184372088850 6815 -Bryn 45 35184372088850 7025 -Bryn 45 35184372088850 7142 -Bryn 45 35184372088850 7689 -Bryn 45 35184372088850 9929 +#query IIII +#-FROM GRAPH_TABLE (snb +# MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) +# WHERE a.id = 28587302322180 +# COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) +# ) tmp +#ORDER BY b_id, t_id +#---- +#Bryn 36 28587302322204 6 +#Bryn 36 28587302322204 588 +#Bryn 36 28587302322204 1021 +#Bryn 36 28587302322204 1767 +#Bryn 36 28587302322204 1940 +#Bryn 36 28587302322204 1995 +#Bryn 36 28587302322204 2018 +#Bryn 36 28587302322204 5174 +#Bryn 36 28587302322204 6413 +#Bryn 36 28587302322204 7328 +#Bryn 36 28587302322204 9170 +#Bryn 36 28587302322204 11695 +#Bryn 36 28587302322204 12002 +#Bryn 38 28587302322223 775 +#Bryn 38 28587302322223 1938 +#Bryn 39 30786325577731 196 +#Bryn 39 30786325577731 1031 +#Bryn 43 32985348833329 3 +#Bryn 43 32985348833329 139 +#Bryn 43 32985348833329 470 +#Bryn 43 32985348833329 580 +#Bryn 43 32985348833329 1985 +#Bryn 43 32985348833329 2058 +#Bryn 43 32985348833329 2777 +#Bryn 43 32985348833329 2836 +#Bryn 43 32985348833329 5114 +#Bryn 45 35184372088850 804 +#Bryn 45 35184372088850 973 +#Bryn 45 35184372088850 1170 +#Bryn 45 35184372088850 1185 +#Bryn 45 35184372088850 1206 +#Bryn 45 35184372088850 1749 +#Bryn 45 35184372088850 1908 +#Bryn 45 35184372088850 1954 +#Bryn 45 35184372088850 2003 +#Bryn 45 35184372088850 2786 +#Bryn 45 35184372088850 2816 +#Bryn 45 35184372088850 2969 +#Bryn 45 35184372088850 2985 +#Bryn 45 35184372088850 4865 +#Bryn 45 35184372088850 6399 +#Bryn 45 35184372088850 6815 +#Bryn 45 35184372088850 7025 +#Bryn 45 35184372088850 7142 +#Bryn 45 35184372088850 7689 +#Bryn 45 35184372088850 9929 query IIII -FROM GRAPH_TABLE (snb From 4dc6ae739d09b65e9caf143f9a63221f80a0be0b Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 12:27:25 +0100 Subject: [PATCH 18/47] Fixed wrong index --- .../functions/tablefunctions/match.cpp | 4 +- test/sql/path-finding/complex_matching.test | 278 +++++++++--------- 2 files changed, 144 insertions(+), 138 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index dc30c122..9f41d904 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -711,7 +711,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, GetPathElement(path_pattern->path_elements[idx_j + 1]); if (!next_vertex_element) { auto next_vertex_subpath = - reinterpret_cast(path_pattern->path_elements[0].get()); + reinterpret_cast(path_pattern->path_elements[idx_j + 1].get()); // Check the size of the subpath path list // if size == 1: // Path Element with a WHERE @@ -723,7 +723,7 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, conditions.push_back(std::move(next_vertex_subpath->where_clause)); } next_vertex_element = - GetPathElement(next_vertex_subpath->path_list[idx_j + 1]); + GetPathElement(next_vertex_subpath->path_list[0]); } if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index aec47aa4..02878cfa 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,93 +49,93 @@ EDGE TABLES ( LABEL replyOf ); -query IIIII -WITH CTE1 AS (SELECT CREATE_CSR_EDGE( - 0, - (SELECT count(a.id) FROM Person a), - CAST ( - (SELECT sum(CREATE_CSR_VERTEX( - 0, - (SELECT count(a.id) FROM Person a), - sub.dense_id, - sub.cnt) - ) - FROM ( - SELECT a.rowid as dense_id, count(k.person1id) as cnt - FROM Person a - LEFT JOIN Person_knows_Person k ON k.person1id = a.id - GROUP BY a.rowid) sub - ) - AS BIGINT), - a.rowid, - b.rowid, - k.rowid) as temp - FROM Person_knows_Person k - JOIN Person a on a.id = k.person1id - JOIN Person b on b.id = k.person2id) -SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, - a.firstname as a_name, - b.rowid as b_rowid, - b.id as b_id, - t.id as t_id -FROM Person a, - Person b, - person_hasInterest_Tag i, - Tag t, - (select count(cte1.temp) as temp from cte1) __x -WHERE a.id = 28587302322180 - and b.id = i.PersonId - and t.id = i.TagId - and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 -ORDER BY b_id, t_id ----- -[33, 77, 36] Bryn 36 28587302322204 6 -[33, 77, 36] Bryn 36 28587302322204 588 -[33, 77, 36] Bryn 36 28587302322204 1021 -[33, 77, 36] Bryn 36 28587302322204 1767 -[33, 77, 36] Bryn 36 28587302322204 1940 -[33, 77, 36] Bryn 36 28587302322204 1995 -[33, 77, 36] Bryn 36 28587302322204 2018 -[33, 77, 36] Bryn 36 28587302322204 5174 -[33, 77, 36] Bryn 36 28587302322204 6413 -[33, 77, 36] Bryn 36 28587302322204 7328 -[33, 77, 36] Bryn 36 28587302322204 9170 -[33, 77, 36] Bryn 36 28587302322204 11695 -[33, 77, 36] Bryn 36 28587302322204 12002 -[33, 78, 38] Bryn 38 28587302322223 775 -[33, 78, 38] Bryn 38 28587302322223 1938 -[33, 79, 39] Bryn 39 30786325577731 196 -[33, 79, 39] Bryn 39 30786325577731 1031 -[33, 80, 43] Bryn 43 32985348833329 3 -[33, 80, 43] Bryn 43 32985348833329 139 -[33, 80, 43] Bryn 43 32985348833329 470 -[33, 80, 43] Bryn 43 32985348833329 580 -[33, 80, 43] Bryn 43 32985348833329 1985 -[33, 80, 43] Bryn 43 32985348833329 2058 -[33, 80, 43] Bryn 43 32985348833329 2777 -[33, 80, 43] Bryn 43 32985348833329 2836 -[33, 80, 43] Bryn 43 32985348833329 5114 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 -[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 - +#query IIIII +#WITH CTE1 AS (SELECT CREATE_CSR_EDGE( +# 0, +# (SELECT count(a.id) FROM Person a), +# CAST ( +# (SELECT sum(CREATE_CSR_VERTEX( +# 0, +# (SELECT count(a.id) FROM Person a), +# sub.dense_id, +# sub.cnt) +# ) +# FROM ( +# SELECT a.rowid as dense_id, count(k.person1id) as cnt +# FROM Person a +# LEFT JOIN Person_knows_Person k ON k.person1id = a.id +# GROUP BY a.rowid) sub +# ) +# AS BIGINT), +# a.rowid, +# b.rowid, +# k.rowid) as temp +# FROM Person_knows_Person k +# JOIN Person a on a.id = k.person1id +# JOIN Person b on b.id = k.person2id) +#SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, +# a.firstname as a_name, +# b.rowid as b_rowid, +# b.id as b_id, +# t.id as t_id +#FROM Person a, +# Person b, +# person_hasInterest_Tag i, +# Tag t, +# (select count(cte1.temp) as temp from cte1) __x +#WHERE a.id = 28587302322180 +# and b.id = i.PersonId +# and t.id = i.TagId +# and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 +#ORDER BY b_id, t_id +#---- +#[33, 77, 36] Bryn 36 28587302322204 6 +#[33, 77, 36] Bryn 36 28587302322204 588 +#[33, 77, 36] Bryn 36 28587302322204 1021 +#[33, 77, 36] Bryn 36 28587302322204 1767 +#[33, 77, 36] Bryn 36 28587302322204 1940 +#[33, 77, 36] Bryn 36 28587302322204 1995 +#[33, 77, 36] Bryn 36 28587302322204 2018 +#[33, 77, 36] Bryn 36 28587302322204 5174 +#[33, 77, 36] Bryn 36 28587302322204 6413 +#[33, 77, 36] Bryn 36 28587302322204 7328 +#[33, 77, 36] Bryn 36 28587302322204 9170 +#[33, 77, 36] Bryn 36 28587302322204 11695 +#[33, 77, 36] Bryn 36 28587302322204 12002 +#[33, 78, 38] Bryn 38 28587302322223 775 +#[33, 78, 38] Bryn 38 28587302322223 1938 +#[33, 79, 39] Bryn 39 30786325577731 196 +#[33, 79, 39] Bryn 39 30786325577731 1031 +#[33, 80, 43] Bryn 43 32985348833329 3 +#[33, 80, 43] Bryn 43 32985348833329 139 +#[33, 80, 43] Bryn 43 32985348833329 470 +#[33, 80, 43] Bryn 43 32985348833329 580 +#[33, 80, 43] Bryn 43 32985348833329 1985 +#[33, 80, 43] Bryn 43 32985348833329 2058 +#[33, 80, 43] Bryn 43 32985348833329 2777 +#[33, 80, 43] Bryn 43 32985348833329 2836 +#[33, 80, 43] Bryn 43 32985348833329 5114 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 +#[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 +# #query IIII #-FROM GRAPH_TABLE (snb # MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) @@ -190,60 +190,69 @@ ORDER BY b_id, t_id #Bryn 45 35184372088850 7142 #Bryn 45 35184372088850 7689 #Bryn 45 35184372088850 9929 +# +#query IIII +#-FROM GRAPH_TABLE (snb +# MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) +# COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) +# ) tmp +#ORDER BY b_id, t_id +#---- +#Bryn 36 28587302322204 6 +#Bryn 36 28587302322204 588 +#Bryn 36 28587302322204 1021 +#Bryn 36 28587302322204 1767 +#Bryn 36 28587302322204 1940 +#Bryn 36 28587302322204 1995 +#Bryn 36 28587302322204 2018 +#Bryn 36 28587302322204 5174 +#Bryn 36 28587302322204 6413 +#Bryn 36 28587302322204 7328 +#Bryn 36 28587302322204 9170 +#Bryn 36 28587302322204 11695 +#Bryn 36 28587302322204 12002 +#Bryn 38 28587302322223 775 +#Bryn 38 28587302322223 1938 +#Bryn 39 30786325577731 196 +#Bryn 39 30786325577731 1031 +#Bryn 43 32985348833329 3 +#Bryn 43 32985348833329 139 +#Bryn 43 32985348833329 470 +#Bryn 43 32985348833329 580 +#Bryn 43 32985348833329 1985 +#Bryn 43 32985348833329 2058 +#Bryn 43 32985348833329 2777 +#Bryn 43 32985348833329 2836 +#Bryn 43 32985348833329 5114 +#Bryn 45 35184372088850 804 +#Bryn 45 35184372088850 973 +#Bryn 45 35184372088850 1170 +#Bryn 45 35184372088850 1185 +#Bryn 45 35184372088850 1206 +#Bryn 45 35184372088850 1749 +#Bryn 45 35184372088850 1908 +#Bryn 45 35184372088850 1954 +#Bryn 45 35184372088850 2003 +#Bryn 45 35184372088850 2786 +#Bryn 45 35184372088850 2816 +#Bryn 45 35184372088850 2969 +#Bryn 45 35184372088850 2985 +#Bryn 45 35184372088850 4865 +#Bryn 45 35184372088850 6399 +#Bryn 45 35184372088850 6815 +#Bryn 45 35184372088850 7025 +#Bryn 45 35184372088850 7142 +#Bryn 45 35184372088850 7689 +#Bryn 45 35184372088850 9929 query IIII -FROM GRAPH_TABLE (snb - MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag WHERE t.id = 6) COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) ) tmp ORDER BY b_id, t_id ---- Bryn 36 28587302322204 6 -Bryn 36 28587302322204 588 -Bryn 36 28587302322204 1021 -Bryn 36 28587302322204 1767 -Bryn 36 28587302322204 1940 -Bryn 36 28587302322204 1995 -Bryn 36 28587302322204 2018 -Bryn 36 28587302322204 5174 -Bryn 36 28587302322204 6413 -Bryn 36 28587302322204 7328 -Bryn 36 28587302322204 9170 -Bryn 36 28587302322204 11695 -Bryn 36 28587302322204 12002 -Bryn 38 28587302322223 775 -Bryn 38 28587302322223 1938 -Bryn 39 30786325577731 196 -Bryn 39 30786325577731 1031 -Bryn 43 32985348833329 3 -Bryn 43 32985348833329 139 -Bryn 43 32985348833329 470 -Bryn 43 32985348833329 580 -Bryn 43 32985348833329 1985 -Bryn 43 32985348833329 2058 -Bryn 43 32985348833329 2777 -Bryn 43 32985348833329 2836 -Bryn 43 32985348833329 5114 -Bryn 45 35184372088850 804 -Bryn 45 35184372088850 973 -Bryn 45 35184372088850 1170 -Bryn 45 35184372088850 1185 -Bryn 45 35184372088850 1206 -Bryn 45 35184372088850 1749 -Bryn 45 35184372088850 1908 -Bryn 45 35184372088850 1954 -Bryn 45 35184372088850 2003 -Bryn 45 35184372088850 2786 -Bryn 45 35184372088850 2816 -Bryn 45 35184372088850 2969 -Bryn 45 35184372088850 2985 -Bryn 45 35184372088850 4865 -Bryn 45 35184372088850 6399 -Bryn 45 35184372088850 6815 -Bryn 45 35184372088850 7025 -Bryn 45 35184372088850 7142 -Bryn 45 35184372088850 7689 -Bryn 45 35184372088850 9929 query II @@ -254,9 +263,6 @@ query II limit 10; ---- - - - statement ok -FROM GRAPH_TABLE (snb MATCH p = ANY SHORTEST (a:Person where a.id = 28587302322180)-[k:knows]->{1,3}(b:Person) From 49c0eebca2c5a4546854dd895ab0629ef550ea51 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 16:11:20 +0100 Subject: [PATCH 19/47] Adding check for subpath of next_vertex --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 4 ++++ test/sql/path-finding/complex_matching.test | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 9f41d904..0190fe7b 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -712,6 +712,10 @@ unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, if (!next_vertex_element) { auto next_vertex_subpath = reinterpret_cast(path_pattern->path_elements[idx_j + 1].get()); + if (next_vertex_subpath->path_list.size() > 1) { + throw NotImplementedException("Recursive patterns are not yet supported."); + } + // Check the size of the subpath path list // if size == 1: // Path Element with a WHERE diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 02878cfa..6a44671f 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -254,6 +254,13 @@ ORDER BY b_id, t_id ---- Bryn 36 28587302322204 6 +statement error +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180){3}) + COLUMNS (a.firstname as a_name) + ) tmp +---- +Parser Error: syntax error at or near "{" query II -FROM GRAPH_TABLE (snb From 6e07919746b3d7d66e44e245cab2004e8c8fca48 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Mon, 15 Jan 2024 16:13:25 +0100 Subject: [PATCH 20/47] Re-enable tests --- test/sql/path-finding/complex_matching.test | 390 ++++++++++---------- 1 file changed, 195 insertions(+), 195 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 6a44671f..aa21e9f2 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,201 +49,201 @@ EDGE TABLES ( LABEL replyOf ); -#query IIIII -#WITH CTE1 AS (SELECT CREATE_CSR_EDGE( -# 0, -# (SELECT count(a.id) FROM Person a), -# CAST ( -# (SELECT sum(CREATE_CSR_VERTEX( -# 0, -# (SELECT count(a.id) FROM Person a), -# sub.dense_id, -# sub.cnt) -# ) -# FROM ( -# SELECT a.rowid as dense_id, count(k.person1id) as cnt -# FROM Person a -# LEFT JOIN Person_knows_Person k ON k.person1id = a.id -# GROUP BY a.rowid) sub -# ) -# AS BIGINT), -# a.rowid, -# b.rowid, -# k.rowid) as temp -# FROM Person_knows_Person k -# JOIN Person a on a.id = k.person1id -# JOIN Person b on b.id = k.person2id) -#SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, -# a.firstname as a_name, -# b.rowid as b_rowid, -# b.id as b_id, -# t.id as t_id -#FROM Person a, -# Person b, -# person_hasInterest_Tag i, -# Tag t, -# (select count(cte1.temp) as temp from cte1) __x -#WHERE a.id = 28587302322180 -# and b.id = i.PersonId -# and t.id = i.TagId -# and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 -#ORDER BY b_id, t_id -#---- -#[33, 77, 36] Bryn 36 28587302322204 6 -#[33, 77, 36] Bryn 36 28587302322204 588 -#[33, 77, 36] Bryn 36 28587302322204 1021 -#[33, 77, 36] Bryn 36 28587302322204 1767 -#[33, 77, 36] Bryn 36 28587302322204 1940 -#[33, 77, 36] Bryn 36 28587302322204 1995 -#[33, 77, 36] Bryn 36 28587302322204 2018 -#[33, 77, 36] Bryn 36 28587302322204 5174 -#[33, 77, 36] Bryn 36 28587302322204 6413 -#[33, 77, 36] Bryn 36 28587302322204 7328 -#[33, 77, 36] Bryn 36 28587302322204 9170 -#[33, 77, 36] Bryn 36 28587302322204 11695 -#[33, 77, 36] Bryn 36 28587302322204 12002 -#[33, 78, 38] Bryn 38 28587302322223 775 -#[33, 78, 38] Bryn 38 28587302322223 1938 -#[33, 79, 39] Bryn 39 30786325577731 196 -#[33, 79, 39] Bryn 39 30786325577731 1031 -#[33, 80, 43] Bryn 43 32985348833329 3 -#[33, 80, 43] Bryn 43 32985348833329 139 -#[33, 80, 43] Bryn 43 32985348833329 470 -#[33, 80, 43] Bryn 43 32985348833329 580 -#[33, 80, 43] Bryn 43 32985348833329 1985 -#[33, 80, 43] Bryn 43 32985348833329 2058 -#[33, 80, 43] Bryn 43 32985348833329 2777 -#[33, 80, 43] Bryn 43 32985348833329 2836 -#[33, 80, 43] Bryn 43 32985348833329 5114 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 -#[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 -# -#query IIII -#-FROM GRAPH_TABLE (snb -# MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) -# WHERE a.id = 28587302322180 -# COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) -# ) tmp -#ORDER BY b_id, t_id -#---- -#Bryn 36 28587302322204 6 -#Bryn 36 28587302322204 588 -#Bryn 36 28587302322204 1021 -#Bryn 36 28587302322204 1767 -#Bryn 36 28587302322204 1940 -#Bryn 36 28587302322204 1995 -#Bryn 36 28587302322204 2018 -#Bryn 36 28587302322204 5174 -#Bryn 36 28587302322204 6413 -#Bryn 36 28587302322204 7328 -#Bryn 36 28587302322204 9170 -#Bryn 36 28587302322204 11695 -#Bryn 36 28587302322204 12002 -#Bryn 38 28587302322223 775 -#Bryn 38 28587302322223 1938 -#Bryn 39 30786325577731 196 -#Bryn 39 30786325577731 1031 -#Bryn 43 32985348833329 3 -#Bryn 43 32985348833329 139 -#Bryn 43 32985348833329 470 -#Bryn 43 32985348833329 580 -#Bryn 43 32985348833329 1985 -#Bryn 43 32985348833329 2058 -#Bryn 43 32985348833329 2777 -#Bryn 43 32985348833329 2836 -#Bryn 43 32985348833329 5114 -#Bryn 45 35184372088850 804 -#Bryn 45 35184372088850 973 -#Bryn 45 35184372088850 1170 -#Bryn 45 35184372088850 1185 -#Bryn 45 35184372088850 1206 -#Bryn 45 35184372088850 1749 -#Bryn 45 35184372088850 1908 -#Bryn 45 35184372088850 1954 -#Bryn 45 35184372088850 2003 -#Bryn 45 35184372088850 2786 -#Bryn 45 35184372088850 2816 -#Bryn 45 35184372088850 2969 -#Bryn 45 35184372088850 2985 -#Bryn 45 35184372088850 4865 -#Bryn 45 35184372088850 6399 -#Bryn 45 35184372088850 6815 -#Bryn 45 35184372088850 7025 -#Bryn 45 35184372088850 7142 -#Bryn 45 35184372088850 7689 -#Bryn 45 35184372088850 9929 -# -#query IIII -#-FROM GRAPH_TABLE (snb -# MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) -# COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) -# ) tmp -#ORDER BY b_id, t_id -#---- -#Bryn 36 28587302322204 6 -#Bryn 36 28587302322204 588 -#Bryn 36 28587302322204 1021 -#Bryn 36 28587302322204 1767 -#Bryn 36 28587302322204 1940 -#Bryn 36 28587302322204 1995 -#Bryn 36 28587302322204 2018 -#Bryn 36 28587302322204 5174 -#Bryn 36 28587302322204 6413 -#Bryn 36 28587302322204 7328 -#Bryn 36 28587302322204 9170 -#Bryn 36 28587302322204 11695 -#Bryn 36 28587302322204 12002 -#Bryn 38 28587302322223 775 -#Bryn 38 28587302322223 1938 -#Bryn 39 30786325577731 196 -#Bryn 39 30786325577731 1031 -#Bryn 43 32985348833329 3 -#Bryn 43 32985348833329 139 -#Bryn 43 32985348833329 470 -#Bryn 43 32985348833329 580 -#Bryn 43 32985348833329 1985 -#Bryn 43 32985348833329 2058 -#Bryn 43 32985348833329 2777 -#Bryn 43 32985348833329 2836 -#Bryn 43 32985348833329 5114 -#Bryn 45 35184372088850 804 -#Bryn 45 35184372088850 973 -#Bryn 45 35184372088850 1170 -#Bryn 45 35184372088850 1185 -#Bryn 45 35184372088850 1206 -#Bryn 45 35184372088850 1749 -#Bryn 45 35184372088850 1908 -#Bryn 45 35184372088850 1954 -#Bryn 45 35184372088850 2003 -#Bryn 45 35184372088850 2786 -#Bryn 45 35184372088850 2816 -#Bryn 45 35184372088850 2969 -#Bryn 45 35184372088850 2985 -#Bryn 45 35184372088850 4865 -#Bryn 45 35184372088850 6399 -#Bryn 45 35184372088850 6815 -#Bryn 45 35184372088850 7025 -#Bryn 45 35184372088850 7142 -#Bryn 45 35184372088850 7689 -#Bryn 45 35184372088850 9929 +query IIIII +WITH CTE1 AS (SELECT CREATE_CSR_EDGE( + 0, + (SELECT count(a.id) FROM Person a), + CAST ( + (SELECT sum(CREATE_CSR_VERTEX( + 0, + (SELECT count(a.id) FROM Person a), + sub.dense_id, + sub.cnt) + ) + FROM ( + SELECT a.rowid as dense_id, count(k.person1id) as cnt + FROM Person a + LEFT JOIN Person_knows_Person k ON k.person1id = a.id + GROUP BY a.rowid) sub + ) + AS BIGINT), + a.rowid, + b.rowid, + k.rowid) as temp + FROM Person_knows_Person k + JOIN Person a on a.id = k.person1id + JOIN Person b on b.id = k.person2id) +SELECT shortestpath(0, (select count(*) from Person), a.rowid, b.rowid) as path, + a.firstname as a_name, + b.rowid as b_rowid, + b.id as b_id, + t.id as t_id +FROM Person a, + Person b, + person_hasInterest_Tag i, + Tag t, + (select count(cte1.temp) as temp from cte1) __x +WHERE a.id = 28587302322180 + and b.id = i.PersonId + and t.id = i.TagId + and __x.temp * 0 + iterativelength(0, (select count(*) from Person), a.rowid, b.rowid) between 1 and 3 +ORDER BY b_id, t_id +---- +[33, 77, 36] Bryn 36 28587302322204 6 +[33, 77, 36] Bryn 36 28587302322204 588 +[33, 77, 36] Bryn 36 28587302322204 1021 +[33, 77, 36] Bryn 36 28587302322204 1767 +[33, 77, 36] Bryn 36 28587302322204 1940 +[33, 77, 36] Bryn 36 28587302322204 1995 +[33, 77, 36] Bryn 36 28587302322204 2018 +[33, 77, 36] Bryn 36 28587302322204 5174 +[33, 77, 36] Bryn 36 28587302322204 6413 +[33, 77, 36] Bryn 36 28587302322204 7328 +[33, 77, 36] Bryn 36 28587302322204 9170 +[33, 77, 36] Bryn 36 28587302322204 11695 +[33, 77, 36] Bryn 36 28587302322204 12002 +[33, 78, 38] Bryn 38 28587302322223 775 +[33, 78, 38] Bryn 38 28587302322223 1938 +[33, 79, 39] Bryn 39 30786325577731 196 +[33, 79, 39] Bryn 39 30786325577731 1031 +[33, 80, 43] Bryn 43 32985348833329 3 +[33, 80, 43] Bryn 43 32985348833329 139 +[33, 80, 43] Bryn 43 32985348833329 470 +[33, 80, 43] Bryn 43 32985348833329 580 +[33, 80, 43] Bryn 43 32985348833329 1985 +[33, 80, 43] Bryn 43 32985348833329 2058 +[33, 80, 43] Bryn 43 32985348833329 2777 +[33, 80, 43] Bryn 43 32985348833329 2836 +[33, 80, 43] Bryn 43 32985348833329 5114 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 804 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 973 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1170 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1185 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1206 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1749 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1908 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 1954 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2003 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2786 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2816 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2969 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 2985 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 4865 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6399 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 6815 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7025 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7142 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 7689 +[33, 77, 36, 82, 45] Bryn 45 35184372088850 9929 + +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + WHERE a.id = 28587302322180 + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +---- +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 + +query IIII +-FROM GRAPH_TABLE (snb + MATCH (a:Person WHERE a.id = 28587302322180)-[w:knows]-> {1,3}(b:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (a.firstname as p_name, b.rowid as b_rowid, b.id as b_id, t.id as t_id) + ) tmp +ORDER BY b_id, t_id +---- +Bryn 36 28587302322204 6 +Bryn 36 28587302322204 588 +Bryn 36 28587302322204 1021 +Bryn 36 28587302322204 1767 +Bryn 36 28587302322204 1940 +Bryn 36 28587302322204 1995 +Bryn 36 28587302322204 2018 +Bryn 36 28587302322204 5174 +Bryn 36 28587302322204 6413 +Bryn 36 28587302322204 7328 +Bryn 36 28587302322204 9170 +Bryn 36 28587302322204 11695 +Bryn 36 28587302322204 12002 +Bryn 38 28587302322223 775 +Bryn 38 28587302322223 1938 +Bryn 39 30786325577731 196 +Bryn 39 30786325577731 1031 +Bryn 43 32985348833329 3 +Bryn 43 32985348833329 139 +Bryn 43 32985348833329 470 +Bryn 43 32985348833329 580 +Bryn 43 32985348833329 1985 +Bryn 43 32985348833329 2058 +Bryn 43 32985348833329 2777 +Bryn 43 32985348833329 2836 +Bryn 43 32985348833329 5114 +Bryn 45 35184372088850 804 +Bryn 45 35184372088850 973 +Bryn 45 35184372088850 1170 +Bryn 45 35184372088850 1185 +Bryn 45 35184372088850 1206 +Bryn 45 35184372088850 1749 +Bryn 45 35184372088850 1908 +Bryn 45 35184372088850 1954 +Bryn 45 35184372088850 2003 +Bryn 45 35184372088850 2786 +Bryn 45 35184372088850 2816 +Bryn 45 35184372088850 2969 +Bryn 45 35184372088850 2985 +Bryn 45 35184372088850 4865 +Bryn 45 35184372088850 6399 +Bryn 45 35184372088850 6815 +Bryn 45 35184372088850 7025 +Bryn 45 35184372088850 7142 +Bryn 45 35184372088850 7689 +Bryn 45 35184372088850 9929 query IIII -FROM GRAPH_TABLE (snb From 59cd37a21379313ba0b5d0cda01ad4db30633703 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 11:35:51 +0100 Subject: [PATCH 21/47] Started on returning the path in a named subpath --- .../functions/tablefunctions/match.hpp | 236 +- .../functions/tablefunctions/match.cpp | 2130 +++++++++-------- duckpgq/src/duckpgq_extension.cpp | 7 +- test/sql/path-finding/complex_matching.test | 35 +- 4 files changed, 1247 insertions(+), 1161 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index e54a76a4..36fac4a0 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -14,119 +14,125 @@ #include "duckdb/parser/path_pattern.hpp" namespace duckdb { - -struct PGQMatchFunction : public TableFunction { -public: + struct PGQMatchFunction : public TableFunction { + public: PGQMatchFunction() { - name = "duckpgq_match"; - bind_replace = MatchBindReplace; - } - struct MatchBindData : public TableFunctionData { - bool done = false; - }; - - - - static shared_ptr - FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); - static void - CheckInheritance(const shared_ptr &tableref, - PathElement *element, - vector> &conditions); - - static void - CheckEdgeTableConstraints(const string &src_reference, - const string &dst_reference, - const shared_ptr &edge_table); - - static unique_ptr CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string &vertex_alias, const string &edge_alias); - - static PathElement * - GetPathElement(const unique_ptr &path_reference); - - static unique_ptr - GetCountTable(const shared_ptr &edge_table, - const string &prev_binding); - - static unique_ptr - GetJoinRef(const shared_ptr &edge_table, - const string &edge_binding, const string &prev_binding, - const string &next_binding); - - static unique_ptr CreateCountCTESubquery(); - - static unique_ptr - CreateCSRCTE(const shared_ptr &edge_table, - const string &edge_binding, const string &prev_binding, - const string &next_binding); - - static void EdgeTypeAny(const shared_ptr &edge_table, - const string &edge_binding, - const string &prev_binding, - const string &next_binding, - vector> &conditions); - - static void EdgeTypeLeft(const shared_ptr &edge_table, - const string &next_table_name, - const string &prev_table_name, - const string &edge_binding, - const string &prev_binding, - const string &next_binding, - vector> &conditions); - - static void EdgeTypeRight(const shared_ptr &edge_table, - const string &next_table_name, - const string &prev_table_name, - const string &edge_binding, - const string &prev_binding, - const string &next_binding, - vector> &conditions); - - static void EdgeTypeLeftRight( - const shared_ptr &edge_table, const string &edge_binding, - const string &prev_binding, const string &next_binding, - vector> &conditions, - unordered_map &alias_map, int32_t &extra_alias_counter); - - static PathElement * - HandleNestedSubPath(unique_ptr &path_reference, - vector> &conditions, - idx_t element_idx); - - static unique_ptr MatchBindReplace(ClientContext &context, - TableFunctionBindInput &input); - - static unique_ptr GenerateSubpathPatternSubquery( - unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, - vector> &column_list, - unordered_set &named_subpaths); - - static unique_ptr - CreatePathFindingFunction(const string &prev_binding, - const string &next_binding, - const shared_ptr &edge_table, - const string &path_finding_udf); - - - static void AddPathFinding(const unique_ptr &select_node, - unique_ptr &from_clause, - vector> &conditions, - const string &prev_binding, const string &edge_binding, const string &next_binding, - const shared_ptr &edge_table, - const SubPath* subpath); - - static void AddEdgeJoins(const unique_ptr &select_node, - const shared_ptr &edge_table, - const shared_ptr &previous_vertex_table, - const shared_ptr &next_vertex_table, - PGQMatchType edge_type, - const string &edge_binding, - const string &prev_binding, - const string &next_binding, - vector> &conditions, - unordered_map &alias_map, - int32_t &extra_alias_counter); -}; -} // namespace duckdb \ No newline at end of file + name = "duckpgq_match"; + bind_replace = MatchBindReplace; + } + + struct MatchBindData : public TableFunctionData { + bool done = false; + }; + + + static shared_ptr + FindGraphTable(const string& label, CreatePropertyGraphInfo& pg_table); + + static void + CheckInheritance(const shared_ptr& tableref, + PathElement* element, + vector>& conditions); + + static void + CheckEdgeTableConstraints(const string& src_reference, + const string& dst_reference, + const shared_ptr& edge_table); + + static unique_ptr CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string& vertex_alias, const string& edge_alias); + + static PathElement* + GetPathElement(const unique_ptr& path_reference); + + static unique_ptr + GetCountTable(const shared_ptr& edge_table, + const string& prev_binding); + + static unique_ptr + GetJoinRef(const shared_ptr& edge_table, + const string& edge_binding, const string& prev_binding, + const string& next_binding); + + static unique_ptr CreateCountCTESubquery(); + + static unique_ptr + CreateCSRCTE(const shared_ptr& edge_table, + const string& edge_binding, const string& prev_binding, + const string& next_binding); + + static void EdgeTypeAny(const shared_ptr& edge_table, + const string& edge_binding, + const string& prev_binding, + const string& next_binding, + vector>& conditions); + + static void EdgeTypeLeft(const shared_ptr& edge_table, + const string& next_table_name, + const string& prev_table_name, + const string& edge_binding, + const string& prev_binding, + const string& next_binding, + vector>& conditions); + + static void EdgeTypeRight(const shared_ptr& edge_table, + const string& next_table_name, + const string& prev_table_name, + const string& edge_binding, + const string& prev_binding, + const string& next_binding, + vector>& conditions); + + static void EdgeTypeLeftRight( + const shared_ptr& edge_table, const string& edge_binding, + const string& prev_binding, const string& next_binding, + vector>& conditions, + unordered_map& alias_map, int32_t& extra_alias_counter); + + static PathElement* + HandleNestedSubPath(unique_ptr& path_reference, + vector>& conditions, + idx_t element_idx); + + static unique_ptr MatchBindReplace(ClientContext& context, + TableFunctionBindInput& input); + + static unique_ptr GenerateSubpathPatternSubquery( + unique_ptr& path_pattern, CreatePropertyGraphInfo* pg_table, + vector>& column_list, + unordered_set& named_subpaths); + + static unique_ptr + CreatePathFindingFunction(vector> &path_list); + + + static void AddPathFinding(const unique_ptr& select_node, + unique_ptr& from_clause, + vector>& conditions, + const string& prev_binding, const string& edge_binding, const string& next_binding, + const shared_ptr& edge_table, + const SubPath* subpath); + + static void AddEdgeJoins(const unique_ptr& select_node, + const shared_ptr& edge_table, + const shared_ptr& previous_vertex_table, + const shared_ptr& next_vertex_table, + PGQMatchType edge_type, + const string& edge_binding, + const string& prev_binding, + const string& next_binding, + vector>& conditions, + unordered_map& alias_map, + int32_t& extra_alias_counter); + + static void ProcessPathList(vector>& path_pattern, + vector>& conditions, + unique_ptr& from_clause, unique_ptr& select_node, + unordered_map& alias_map, + CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, + vector>& column_list); + + static bool CheckNamedSubpath(string subpath_name, vector>& column_list); + }; +} // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 0190fe7b..81d8afe9 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -26,1060 +26,1120 @@ namespace duckdb { + shared_ptr + PGQMatchFunction::FindGraphTable(const string& label, + CreatePropertyGraphInfo& pg_table) { + const auto graph_table_entry = pg_table.label_map.find(label); + if (graph_table_entry == pg_table.label_map.end()) { + throw BinderException("The label %s is not registered in property graph %s", + label, pg_table.property_graph_name); + } + + return graph_table_entry->second; + } + + void PGQMatchFunction::CheckInheritance( + const shared_ptr& tableref, PathElement* element, + vector>& conditions) { + if (tableref->main_label == element->label) { + return; + } + auto constant_expression_two = + make_uniq(Value::INTEGER((int32_t) 2)); + const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), + element->label); + + const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); + auto constant_expression_idx_label = + make_uniq(Value::INTEGER(static_cast(idx_of_element))); + + vector> power_of_children; + power_of_children.push_back(std::move(constant_expression_two)); + power_of_children.push_back(std::move(constant_expression_idx_label)); + auto power_of_term = + make_uniq("power", std::move(power_of_children)); + auto bigint_cast = + make_uniq(LogicalType::BIGINT, std::move(power_of_term)); + auto subcategory_colref = make_uniq( + tableref->discriminator, element->variable_binding); + vector> and_children; + and_children.push_back(std::move(subcategory_colref)); + and_children.push_back(std::move(bigint_cast)); + + auto and_expression = + make_uniq("&", std::move(and_children)); + + auto constant_expression_idx_label_comparison = make_uniq( + Value::INTEGER(static_cast(idx_of_element + 1))); + + auto subset_compare = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(and_expression), + std::move(constant_expression_idx_label_comparison)); + conditions.push_back(std::move(subset_compare)); + } + + void PGQMatchFunction::CheckEdgeTableConstraints( + const string& src_reference, const string& dst_reference, + const shared_ptr& edge_table) { + if (src_reference != edge_table->source_reference) { + throw BinderException("Label %s is not registered as a source reference " + "for edge pattern of table %s", + src_reference, edge_table->table_name); + } + if (dst_reference != edge_table->destination_reference) { + throw BinderException("Label %s is not registered as a destination " + "reference for edge pattern of table %s", + src_reference, edge_table->table_name); + } + } + + unique_ptr PGQMatchFunction::CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string& vertex_alias, const string& edge_alias) { + vector> conditions; + + if (vertex_keys.size() != edge_keys.size()) { + throw BinderException("Vertex columns and edge columns size mismatch"); + } + for (idx_t i = 0; i < vertex_keys.size(); i++) { + auto vertex_colref = + make_uniq(vertex_keys[i], vertex_alias); + auto edge_colref = make_uniq(edge_keys[i], edge_alias); + conditions.push_back(make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), + std::move(edge_colref))); + } + unique_ptr where_clause; + + for (auto& condition: conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } -shared_ptr -PGQMatchFunction::FindGraphTable(const string &label, - CreatePropertyGraphInfo &pg_table) { - const auto graph_table_entry = pg_table.label_map.find(label); - if (graph_table_entry == pg_table.label_map.end()) { - throw BinderException("The label %s is not registered in property graph %s", - label, pg_table.property_graph_name); - } - - return graph_table_entry->second; -} - -void PGQMatchFunction::CheckInheritance( - const shared_ptr &tableref, PathElement *element, - vector> &conditions) { - if (tableref->main_label == element->label) { - return; - } - auto constant_expression_two = - make_uniq(Value::INTEGER((int32_t)2)); - const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), - element->label); - - const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); - auto constant_expression_idx_label = - make_uniq(Value::INTEGER(static_cast(idx_of_element))); - - vector> power_of_children; - power_of_children.push_back(std::move(constant_expression_two)); - power_of_children.push_back(std::move(constant_expression_idx_label)); - auto power_of_term = - make_uniq("power", std::move(power_of_children)); - auto bigint_cast = - make_uniq(LogicalType::BIGINT, std::move(power_of_term)); - auto subcategory_colref = make_uniq( - tableref->discriminator, element->variable_binding); - vector> and_children; - and_children.push_back(std::move(subcategory_colref)); - and_children.push_back(std::move(bigint_cast)); - - auto and_expression = - make_uniq("&", std::move(and_children)); - - auto constant_expression_idx_label_comparison = make_uniq( - Value::INTEGER(static_cast(idx_of_element + 1))); - - auto subset_compare = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(and_expression), - std::move(constant_expression_idx_label_comparison)); - conditions.push_back(std::move(subset_compare)); -} - -void PGQMatchFunction::CheckEdgeTableConstraints( - const string &src_reference, const string &dst_reference, - const shared_ptr &edge_table) { - if (src_reference != edge_table->source_reference) { - throw BinderException("Label %s is not registered as a source reference " - "for edge pattern of table %s", - src_reference, edge_table->table_name); - } - if (dst_reference != edge_table->destination_reference) { - throw BinderException("Label %s is not registered as a destination " - "reference for edge pattern of table %s", - src_reference, edge_table->table_name); - } -} - -unique_ptr PGQMatchFunction::CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string &vertex_alias, const string &edge_alias) { - vector> conditions; - - if (vertex_keys.size() != edge_keys.size()) { - throw BinderException("Vertex columns and edge columns size mismatch"); - } - for (idx_t i = 0; i < vertex_keys.size(); i++) { - auto vertex_colref = - make_uniq(vertex_keys[i], vertex_alias); - auto edge_colref = make_uniq(edge_keys[i], edge_alias); - conditions.push_back(make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), - std::move(edge_colref))); - } - unique_ptr where_clause; - - for (auto &condition : conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - - return where_clause; -} - -PathElement *PGQMatchFunction::GetPathElement( - const unique_ptr &path_reference) { - if (path_reference->path_reference_type == - PGQPathReferenceType::PATH_ELEMENT) { - return reinterpret_cast(path_reference.get()); - } if (path_reference->path_reference_type == - PGQPathReferenceType::SUBPATH) { - return nullptr; + return where_clause; } - throw InternalException("Unknown path reference type detected"); - -} - -unique_ptr -PGQMatchFunction::GetCountTable(const shared_ptr &edge_table, - const string &prev_binding) { - // SELECT count(s.id) FROM src s - auto select_count = make_uniq(); - auto select_inner = make_uniq(); - auto ref = make_uniq(); - - ref->table_name = edge_table->source_reference; - ref->alias = prev_binding; - select_inner->from_table = std::move(ref); - vector> children; - children.push_back( - make_uniq(edge_table->source_pk[0], prev_binding)); - - auto count_function = - make_uniq("count", std::move(children)); - select_inner->select_list.push_back(std::move(count_function)); - select_count->node = std::move(select_inner); - auto result = make_uniq(); - result->subquery = std::move(select_count); - result->subquery_type = SubqueryType::SCALAR; - return result; -} - -unique_ptr -PGQMatchFunction::GetJoinRef(const shared_ptr &edge_table, - const string &edge_binding, - const string &prev_binding, - const string &next_binding) { - auto first_join_ref = make_uniq(JoinRefType::REGULAR); - first_join_ref->type = JoinType::INNER; - - auto second_join_ref = make_uniq(JoinRefType::REGULAR); - second_join_ref->type = JoinType::INNER; - - auto edge_base_ref = make_uniq(); - edge_base_ref->table_name = edge_table->table_name; - edge_base_ref->alias = edge_binding; - auto src_base_ref = make_uniq(); - src_base_ref->table_name = edge_table->source_reference; - src_base_ref->alias = prev_binding; - second_join_ref->left = std::move(edge_base_ref); - second_join_ref->right = std::move(src_base_ref); - auto t_from_ref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto src_cid_ref = - make_uniq(edge_table->source_pk[0], prev_binding); - second_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), - std::move(src_cid_ref)); - auto dst_base_ref = make_uniq(); - dst_base_ref->table_name = edge_table->destination_reference; - dst_base_ref->alias = next_binding; - first_join_ref->left = std::move(second_join_ref); - first_join_ref->right = std::move(dst_base_ref); - - auto t_to_ref = make_uniq(edge_table->destination_fk[0], - edge_binding); - auto dst_cid_ref = make_uniq( - edge_table->destination_pk[0], next_binding); - first_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), - std::move(dst_cid_ref)); - return first_join_ref; -} - -unique_ptr PGQMatchFunction::CreateCountCTESubquery() { - //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x - - auto temp_cte_select_node = make_uniq(); - - auto cte_table_ref = make_uniq(); - - cte_table_ref->table_name = "cte1"; - temp_cte_select_node->from_table = std::move(cte_table_ref); - vector> children; - children.push_back(make_uniq("temp", "cte1")); - - auto count_function = - make_uniq("count", std::move(children)); - - auto zero = make_uniq(Value::INTEGER((int32_t)0)); - - vector> multiply_children; - - multiply_children.push_back(std::move(zero)); - multiply_children.push_back(std::move(count_function)); - auto multiply_function = - make_uniq("multiply", std::move(multiply_children)); - multiply_function->alias = "temp"; - temp_cte_select_node->select_list.push_back(std::move(multiply_function)); - auto temp_cte_select_statement = make_uniq(); - temp_cte_select_statement->node = std::move(temp_cte_select_node); - - auto temp_cte_select_subquery = - make_uniq(std::move(temp_cte_select_statement), "__x"); - //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x - return temp_cte_select_subquery; -} - -unique_ptr -PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, - const string &prev_binding, - const string &edge_binding, - const string &next_binding) { - auto csr_edge_id_constant = - make_uniq(Value::INTEGER(0)); - auto count_create_edge_select = GetCountTable(edge_table, prev_binding); - - auto cast_subquery_expr = make_uniq(); - auto cast_select_node = make_uniq(); - - vector> csr_vertex_children; - csr_vertex_children.push_back( - make_uniq(Value::INTEGER(0))); - - auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); - - csr_vertex_children.push_back(std::move(count_create_vertex_expr)); - - csr_vertex_children.push_back( - make_uniq("dense_id", "sub")); - csr_vertex_children.push_back(make_uniq("cnt", "sub")); - - auto create_vertex_function = make_uniq( - "create_csr_vertex", std::move(csr_vertex_children)); - vector> sum_children; - sum_children.push_back(std::move(create_vertex_function)); - auto sum_function = - make_uniq("sum", std::move(sum_children)); - - auto inner_select_statement = make_uniq(); - auto inner_select_node = make_uniq(); - - auto source_rowid_colref = - make_uniq("rowid", prev_binding); - source_rowid_colref->alias = "dense_id"; - - auto count_create_inner_expr = make_uniq(); - count_create_inner_expr->subquery_type = SubqueryType::SCALAR; - auto edge_src_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - vector> inner_count_children; - inner_count_children.push_back(std::move(edge_src_colref)); - auto inner_count_function = - make_uniq("count", std::move(inner_count_children)); - inner_count_function->alias = "cnt"; - - inner_select_node->select_list.push_back(std::move(source_rowid_colref)); - inner_select_node->select_list.push_back(std::move(inner_count_function)); - auto source_rowid_colref_1 = - make_uniq("rowid", prev_binding); - expression_map_t grouping_expression_map; - inner_select_node->groups.group_expressions.push_back( - std::move(source_rowid_colref_1)); - GroupingSet grouping_set = {0}; - inner_select_node->groups.grouping_sets.push_back(grouping_set); - - auto inner_join_ref = make_uniq(JoinRefType::REGULAR); - inner_join_ref->type = JoinType::LEFT; - auto left_base_ref = make_uniq(); - left_base_ref->table_name = edge_table->source_reference; - left_base_ref->alias = prev_binding; - auto right_base_ref = make_uniq(); - right_base_ref->table_name = edge_table->table_name; - right_base_ref->alias = edge_binding; - inner_join_ref->left = std::move(left_base_ref); - inner_join_ref->right = std::move(right_base_ref); - - auto edge_join_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto vertex_join_colref = - make_uniq(edge_table->source_pk[0], prev_binding); - - inner_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), - std::move(vertex_join_colref)); - inner_select_node->from_table = std::move(inner_join_ref); - inner_select_statement->node = std::move(inner_select_node); - - auto inner_from_subquery = - make_uniq(std::move(inner_select_statement), "sub"); - - cast_select_node->from_table = std::move(inner_from_subquery); - - cast_select_node->select_list.push_back(std::move(sum_function)); - auto cast_select_stmt = make_uniq(); - cast_select_stmt->node = std::move(cast_select_node); - cast_subquery_expr->subquery = std::move(cast_select_stmt); - cast_subquery_expr->subquery_type = SubqueryType::SCALAR; - - auto src_rowid_colref = make_uniq("rowid", prev_binding); - auto dst_rowid_colref = make_uniq("rowid", next_binding); - auto edge_rowid_colref = - make_uniq("rowid", edge_binding); - - auto cast_expression = make_uniq( - LogicalType::BIGINT, std::move(cast_subquery_expr)); - - vector> csr_edge_children; - csr_edge_children.push_back(std::move(csr_edge_id_constant)); - csr_edge_children.push_back(std::move(count_create_edge_select)); - csr_edge_children.push_back(std::move(cast_expression)); - csr_edge_children.push_back(std::move(src_rowid_colref)); - csr_edge_children.push_back(std::move(dst_rowid_colref)); - csr_edge_children.push_back(std::move(edge_rowid_colref)); - - auto outer_select_node = make_uniq(); - - auto create_csr_edge_function = make_uniq( - "create_csr_edge", std::move(csr_edge_children)); - create_csr_edge_function->alias = "temp"; - - outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); - outer_select_node->from_table = - GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); - auto outer_select_statement = make_uniq(); - - outer_select_statement->node = std::move(outer_select_node); - auto info = make_uniq(); - info->query = std::move(outer_select_statement); - return info; -} - -void PGQMatchFunction::EdgeTypeAny( - const shared_ptr &edge_table, const string &edge_binding, - const string &prev_binding, const string &next_binding, - vector> &conditions) { - // (a) src.key = edge.src - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, - prev_binding, edge_binding); - // (b) dst.key = edge.dst - auto dst_left_expr = CreateMatchJoinExpression( - edge_table->destination_pk,edge_table->destination_fk, - next_binding, edge_binding); - // (a) AND (b) - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - // (c) src.key = edge.dst - auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - // (d) dst.key = edge.src - auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->source_fk, - next_binding, edge_binding); - // (c) AND (d) - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - // ((a) AND (b)) OR ((c) AND (d)) - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); -} - -void PGQMatchFunction::EdgeTypeLeft( - const shared_ptr &edge_table, const string &next_table_name, - const string &prev_table_name, const string &edge_binding, - const string &prev_binding, const string &next_binding, - vector> &conditions) { - CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - next_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding)); -} - -void PGQMatchFunction::EdgeTypeRight( - const shared_ptr &edge_table, const string &next_table_name, - const string &prev_table_name, const string &edge_binding, - const string &prev_binding, const string &next_binding, - vector> &conditions) { - CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - prev_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - next_binding, edge_binding)); -} - -void PGQMatchFunction::EdgeTypeLeftRight( - const shared_ptr &edge_table, const string &edge_binding, - const string &prev_binding, const string &next_binding, - vector> &conditions, - unordered_map &alias_map, int32_t &extra_alias_counter) { - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); - auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - - const auto additional_edge_alias = - edge_binding + std::to_string(extra_alias_counter); - extra_alias_counter++; - - alias_map[additional_edge_alias] = edge_table->table_name; - - auto src_right_expr = - CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, - prev_binding, additional_edge_alias); - auto dst_right_expr = CreateMatchJoinExpression( - edge_table->destination_pk, edge_table->destination_fk, next_binding, - additional_edge_alias); - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); -} - -PathElement *PGQMatchFunction::HandleNestedSubPath( - unique_ptr &path_reference, - vector> &conditions, idx_t element_idx) { - auto subpath = reinterpret_cast(path_reference.get()); - return GetPathElement(subpath->path_list[element_idx]); -} - -unique_ptr -CreateWhereClause(vector> &conditions) { - unique_ptr where_clause; - for (auto &condition : conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - return where_clause; -} - -unique_ptr PGQMatchFunction::CreatePathFindingFunction( - const string &prev_binding, const string &next_binding, - const shared_ptr &edge_table, - const string &path_finding_udf) { - auto src_row_id = make_uniq("rowid", prev_binding); - auto dst_row_id = make_uniq("rowid", next_binding); - auto csr_id = make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back( - std::move(GetCountTable(edge_table, prev_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - return make_uniq(path_finding_udf, - std::move(pathfinding_children)); -} - -// void UnnestSubpath(const unique_ptr &subpath, -// vector> &conditions, -// unique_ptr &from_clause) { -// const auto path_element = -// reinterpret_cast(subpath.get()); -// std::cout << path_element->path_variable; -// -// } - -void PGQMatchFunction::AddEdgeJoins(const unique_ptr &select_node, - const shared_ptr &edge_table, - const shared_ptr &previous_vertex_table, - const shared_ptr &next_vertex_table, - PGQMatchType edge_type, - const string &edge_binding, - const string &prev_binding, - const string &next_binding, - vector> &conditions, - unordered_map &alias_map, - int32_t &extra_alias_counter) { - switch (edge_type) { - case PGQMatchType::MATCH_EDGE_ANY: { - select_node->modifiers.push_back(make_uniq()); - EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions); - break; + + PathElement* PGQMatchFunction::GetPathElement( + const unique_ptr& path_reference) { + if (path_reference->path_reference_type == + PGQPathReferenceType::PATH_ELEMENT) { + return reinterpret_cast(path_reference.get()); + } + if (path_reference->path_reference_type == + PGQPathReferenceType::SUBPATH) { + return nullptr; } - case PGQMatchType::MATCH_EDGE_LEFT: - EdgeTypeLeft(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_RIGHT: - EdgeTypeRight(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - EdgeTypeLeftRight(edge_table, edge_binding, - prev_binding, next_binding, conditions, - alias_map, extra_alias_counter); - break; + throw InternalException("Unknown path reference type detected"); + } + + unique_ptr + PGQMatchFunction::GetCountTable(const shared_ptr& edge_table, + const string& prev_binding) { + // SELECT count(s.id) FROM src s + auto select_count = make_uniq(); + auto select_inner = make_uniq(); + auto ref = make_uniq(); + + ref->table_name = edge_table->source_reference; + ref->alias = prev_binding; + select_inner->from_table = std::move(ref); + vector> children; + children.push_back( + make_uniq(edge_table->source_pk[0], prev_binding)); + + auto count_function = + make_uniq("count", std::move(children)); + select_inner->select_list.push_back(std::move(count_function)); + select_count->node = std::move(select_inner); + auto result = make_uniq(); + result->subquery = std::move(select_count); + result->subquery_type = SubqueryType::SCALAR; + return result; + } + + unique_ptr + PGQMatchFunction::GetJoinRef(const shared_ptr& edge_table, + const string& edge_binding, + const string& prev_binding, + const string& next_binding) { + auto first_join_ref = make_uniq(JoinRefType::REGULAR); + first_join_ref->type = JoinType::INNER; + + auto second_join_ref = make_uniq(JoinRefType::REGULAR); + second_join_ref->type = JoinType::INNER; + + auto edge_base_ref = make_uniq(); + edge_base_ref->table_name = edge_table->table_name; + edge_base_ref->alias = edge_binding; + auto src_base_ref = make_uniq(); + src_base_ref->table_name = edge_table->source_reference; + src_base_ref->alias = prev_binding; + second_join_ref->left = std::move(edge_base_ref); + second_join_ref->right = std::move(src_base_ref); + auto t_from_ref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto src_cid_ref = + make_uniq(edge_table->source_pk[0], prev_binding); + second_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), + std::move(src_cid_ref)); + auto dst_base_ref = make_uniq(); + dst_base_ref->table_name = edge_table->destination_reference; + dst_base_ref->alias = next_binding; + first_join_ref->left = std::move(second_join_ref); + first_join_ref->right = std::move(dst_base_ref); + + auto t_to_ref = make_uniq(edge_table->destination_fk[0], + edge_binding); + auto dst_cid_ref = make_uniq( + edge_table->destination_pk[0], next_binding); + first_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), + std::move(dst_cid_ref)); + return first_join_ref; + } + + unique_ptr PGQMatchFunction::CreateCountCTESubquery() { + //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x + + auto temp_cte_select_node = make_uniq(); + + auto cte_table_ref = make_uniq(); + + cte_table_ref->table_name = "cte1"; + temp_cte_select_node->from_table = std::move(cte_table_ref); + vector> children; + children.push_back(make_uniq("temp", "cte1")); + + auto count_function = + make_uniq("count", std::move(children)); + + auto zero = make_uniq(Value::INTEGER((int32_t) 0)); + + vector> multiply_children; + + multiply_children.push_back(std::move(zero)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = + make_uniq("multiply", std::move(multiply_children)); + multiply_function->alias = "temp"; + temp_cte_select_node->select_list.push_back(std::move(multiply_function)); + auto temp_cte_select_statement = make_uniq(); + temp_cte_select_statement->node = std::move(temp_cte_select_node); + + auto temp_cte_select_subquery = + make_uniq(std::move(temp_cte_select_statement), "__x"); + //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x + return temp_cte_select_subquery; + } + + unique_ptr + PGQMatchFunction::CreateCSRCTE(const shared_ptr& edge_table, + const string& prev_binding, + const string& edge_binding, + const string& next_binding) { + auto csr_edge_id_constant = + make_uniq(Value::INTEGER(0)); + auto count_create_edge_select = GetCountTable(edge_table, prev_binding); + + auto cast_subquery_expr = make_uniq(); + auto cast_select_node = make_uniq(); + + vector> csr_vertex_children; + csr_vertex_children.push_back( + make_uniq(Value::INTEGER(0))); + + auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); + + csr_vertex_children.push_back(std::move(count_create_vertex_expr)); + + csr_vertex_children.push_back( + make_uniq("dense_id", "sub")); + csr_vertex_children.push_back(make_uniq("cnt", "sub")); + + auto create_vertex_function = make_uniq( + "create_csr_vertex", std::move(csr_vertex_children)); + vector> sum_children; + sum_children.push_back(std::move(create_vertex_function)); + auto sum_function = + make_uniq("sum", std::move(sum_children)); + + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + auto source_rowid_colref = + make_uniq("rowid", prev_binding); + source_rowid_colref->alias = "dense_id"; + + auto count_create_inner_expr = make_uniq(); + count_create_inner_expr->subquery_type = SubqueryType::SCALAR; + auto edge_src_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + vector> inner_count_children; + inner_count_children.push_back(std::move(edge_src_colref)); + auto inner_count_function = + make_uniq("count", std::move(inner_count_children)); + inner_count_function->alias = "cnt"; + + inner_select_node->select_list.push_back(std::move(source_rowid_colref)); + inner_select_node->select_list.push_back(std::move(inner_count_function)); + auto source_rowid_colref_1 = + make_uniq("rowid", prev_binding); + expression_map_t grouping_expression_map; + inner_select_node->groups.group_expressions.push_back( + std::move(source_rowid_colref_1)); + GroupingSet grouping_set = {0}; + inner_select_node->groups.grouping_sets.push_back(grouping_set); + + auto inner_join_ref = make_uniq(JoinRefType::REGULAR); + inner_join_ref->type = JoinType::LEFT; + auto left_base_ref = make_uniq(); + left_base_ref->table_name = edge_table->source_reference; + left_base_ref->alias = prev_binding; + auto right_base_ref = make_uniq(); + right_base_ref->table_name = edge_table->table_name; + right_base_ref->alias = edge_binding; + inner_join_ref->left = std::move(left_base_ref); + inner_join_ref->right = std::move(right_base_ref); + + auto edge_join_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto vertex_join_colref = + make_uniq(edge_table->source_pk[0], prev_binding); + + inner_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), + std::move(vertex_join_colref)); + inner_select_node->from_table = std::move(inner_join_ref); + inner_select_statement->node = std::move(inner_select_node); + + auto inner_from_subquery = + make_uniq(std::move(inner_select_statement), "sub"); + + cast_select_node->from_table = std::move(inner_from_subquery); + + cast_select_node->select_list.push_back(std::move(sum_function)); + auto cast_select_stmt = make_uniq(); + cast_select_stmt->node = std::move(cast_select_node); + cast_subquery_expr->subquery = std::move(cast_select_stmt); + cast_subquery_expr->subquery_type = SubqueryType::SCALAR; + + auto src_rowid_colref = make_uniq("rowid", prev_binding); + auto dst_rowid_colref = make_uniq("rowid", next_binding); + auto edge_rowid_colref = + make_uniq("rowid", edge_binding); + + auto cast_expression = make_uniq( + LogicalType::BIGINT, std::move(cast_subquery_expr)); + + vector> csr_edge_children; + csr_edge_children.push_back(std::move(csr_edge_id_constant)); + csr_edge_children.push_back(std::move(count_create_edge_select)); + csr_edge_children.push_back(std::move(cast_expression)); + csr_edge_children.push_back(std::move(src_rowid_colref)); + csr_edge_children.push_back(std::move(dst_rowid_colref)); + csr_edge_children.push_back(std::move(edge_rowid_colref)); + + auto outer_select_node = make_uniq(); + + auto create_csr_edge_function = make_uniq( + "create_csr_edge", std::move(csr_edge_children)); + create_csr_edge_function->alias = "temp"; + + outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); + outer_select_node->from_table = + GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); + auto outer_select_statement = make_uniq(); + + outer_select_statement->node = std::move(outer_select_node); + auto info = make_uniq(); + info->query = std::move(outer_select_statement); + return info; + } + + void PGQMatchFunction::EdgeTypeAny( + const shared_ptr& edge_table, const string& edge_binding, + const string& prev_binding, const string& next_binding, + vector>& conditions) { + // (a) src.key = edge.src + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, + prev_binding, edge_binding); + // (b) dst.key = edge.dst + auto dst_left_expr = CreateMatchJoinExpression( + edge_table->destination_pk, edge_table->destination_fk, + next_binding, edge_binding); + // (a) AND (b) + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + // (c) src.key = edge.dst + auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + // (d) dst.key = edge.src + auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->source_fk, + next_binding, edge_binding); + // (c) AND (d) + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + // ((a) AND (b)) OR ((c) AND (d)) + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); + } + + void PGQMatchFunction::EdgeTypeLeft( + const shared_ptr& edge_table, const string& next_table_name, + const string& prev_table_name, const string& edge_binding, + const string& prev_binding, const string& next_binding, + vector>& conditions) { + CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + next_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding)); + } + + void PGQMatchFunction::EdgeTypeRight( + const shared_ptr& edge_table, const string& next_table_name, + const string& prev_table_name, const string& edge_binding, + const string& prev_binding, const string& next_binding, + vector>& conditions) { + CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + prev_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + next_binding, edge_binding)); + } + + void PGQMatchFunction::EdgeTypeLeftRight( + const shared_ptr& edge_table, const string& edge_binding, + const string& prev_binding, const string& next_binding, + vector>& conditions, + unordered_map& alias_map, int32_t& extra_alias_counter) { + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); + auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + + const auto additional_edge_alias = + edge_binding + std::to_string(extra_alias_counter); + extra_alias_counter++; + + alias_map[additional_edge_alias] = edge_table->table_name; + + auto src_right_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, + prev_binding, additional_edge_alias); + auto dst_right_expr = CreateMatchJoinExpression( + edge_table->destination_pk, edge_table->destination_fk, next_binding, + additional_edge_alias); + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); + } + + PathElement* PGQMatchFunction::HandleNestedSubPath( + unique_ptr& path_reference, + vector>& conditions, idx_t element_idx) { + auto subpath = reinterpret_cast(path_reference.get()); + return GetPathElement(subpath->path_list[element_idx]); + } + + unique_ptr + CreateWhereClause(vector>& conditions) { + unique_ptr where_clause; + for (auto& condition: conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } } - default: - throw InternalException("Unknown match type found"); + return where_clause; } -} - -void PGQMatchFunction::AddPathFinding(const unique_ptr &select_node, - unique_ptr &from_clause, - vector> &conditions, - const string &prev_binding, const string &edge_binding, const string &next_binding, - const shared_ptr &edge_table, - const SubPath* subpath) { - //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - select_node->cte_map.map["cte1"] = CreateCSRCTE( - edge_table, prev_binding, - edge_binding, - next_binding); - // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); - - //! src alias (FROM src a) - // auto src_vertex_ref = make_uniq(); - // src_vertex_ref->table_name = edge_table->source_reference; - // src_vertex_ref->alias = prev_binding; - - - // cross_join_src_dst->left = std::move(src_vertex_ref); - - //! dst alias (FROM dst b) - // auto dst_vertex_ref = make_uniq(); - // dst_vertex_ref->table_name = edge_table->destination_reference; - // dst_vertex_ref->alias = next_binding; - - // cross_join_src_dst->right = std::move(dst_vertex_ref); - - //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - // cross_join_with_cte->left = std::move(temp_cte_select_subquery); - // cross_join_with_cte->right = std::move(cross_join_src_dst); - - auto temp_cte_select_subquery = CreateCountCTESubquery(); - // from_clause = std::move(temp_cte_select_subquery); - - if (from_clause) { - // create a cross join since there is already something in the - // from clause - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(from_clause); - from_join->right = std::move(temp_cte_select_subquery); - from_clause = std::move(from_join); - } else { - from_clause = std::move(temp_cte_select_subquery); + + unique_ptr PGQMatchFunction::CreatePathFindingFunction( + vector> &path_list) { + // This method will return a SubqueryRef of a list of rowids + // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend + // The difficulty is that there may be a (un)bounded path pattern at some point in the query + // This is computed using the shortestpath() UDF and returns a list. + // This list will be part of the full list of element rowids, using list_concat. + // For now we will only support returning rowids + unique_ptr final_list; + + auto previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + // We hit a vertex element with a WHERE, but we only care about the rowid here + auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); + } + + for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i=idx_i+2) { + auto edge_element = GetPathElement(path_list[idx_i]); + if (!edge_element) { + auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); + if (edge_subpath->upper > 1) { + throw NotImplementedException("Returning the path list for (un)bounded paths has not yet been implemented."); + // (un)bounded shortest path + // Add the shortest path UDF + + // Set next vertex to be previous + continue; + } + } + + auto next_vertex_element = GetPathElement(path_list[idx_i+1]); + if (!next_vertex_element) { + auto next_vertex_subpath = reinterpret_cast(path_list[idx_i+1].get()); + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + + if (!final_list) { + auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); + auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); + auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); + auto starting_list_children = vector>(); + starting_list_children.push_back(std::move(previous_rowid)); + auto starting_list = make_uniq("list", std::move(starting_list_children)); + auto list_append_children = vector>(); + list_append_children.push_back(std::move(final_list)); + list_append_children.push_back(std::move(edge_rowid)); + auto list_append_ = make_uniq("list_append", std::move(list_append_children)); + + list_append_children = vector>(); + list_append_children.push_back(std::move(list_append_)); + list_append_children.push_back(std::move(next_rowid)); + final_list = make_uniq("list_append", std::move(list_append_children)); + } + } + + return final_list; + + // auto src_row_id = make_uniq("rowid", prev_binding); + // auto dst_row_id = make_uniq("rowid", next_binding); + // auto csr_id = make_uniq(Value::INTEGER(0)); + // + // vector> pathfinding_children; + // pathfinding_children.push_back(std::move(csr_id)); + // pathfinding_children.push_back( + // std::move(GetCountTable(edge_table, prev_binding))); + // pathfinding_children.push_back(std::move(src_row_id)); + // pathfinding_children.push_back(std::move(dst_row_id)); + // + // return make_uniq(path_finding_udf, + // std::move(pathfinding_children)); } - //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - - //! START - //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) - //! from dst c, a.rowid, b.rowid) between lower and upper - - auto src_row_id = make_uniq( - "rowid", prev_binding); - auto dst_row_id = make_uniq( - "rowid", next_binding); - auto csr_id = - make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back(std::move(GetCountTable( - edge_table, prev_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto reachability_function = make_uniq( - "iterativelength", std::move(pathfinding_children)); - - auto cte_col_ref = make_uniq("temp", "__x"); - - vector> addition_children; - addition_children.push_back(std::move(cte_col_ref)); - addition_children.push_back(std::move(reachability_function)); - - auto addition_function = make_uniq( - "add", std::move(addition_children)); - auto lower_limit = - make_uniq(Value::INTEGER(static_cast(subpath->lower))); - auto upper_limit = - make_uniq(Value::INTEGER(static_cast(subpath->upper))); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); - - //! END - //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid) between lower and upper -} - - -unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext &context, - TableFunctionBindInput &) { - auto duckpgq_state_entry = context.registered_state.find("duckpgq"); - auto duckpgq_state = dynamic_cast(duckpgq_state_entry->second.get()); - - auto ref = dynamic_cast( - duckpgq_state->transform_expression.get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); - - auto data = make_uniq(); - - vector> conditions; - - auto select_node = make_uniq(); - unordered_map alias_map; - unique_ptr from_clause; - - int32_t extra_alias_counter = 0; - for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { - auto &path_pattern = ref->path_patterns[idx_i]; - // Check if the element is PathElement or a Subpath with potentially many - // items - PathElement *previous_vertex_element = - GetPathElement(path_pattern->path_elements[0]); - if (!previous_vertex_element) { - // todo(dtenwolde) this will be hit with MATCH o = . Or with a WHERE. - auto previous_vertex_subpath = reinterpret_cast(path_pattern->path_elements[0].get()); - if (previous_vertex_subpath->where_clause) { - conditions.push_back(std::move(previous_vertex_subpath->where_clause)); - } - if (previous_vertex_subpath->path_list.size() == 1) { + + void PGQMatchFunction::AddEdgeJoins(const unique_ptr& select_node, + const shared_ptr& edge_table, + const shared_ptr& previous_vertex_table, + const shared_ptr& next_vertex_table, + PGQMatchType edge_type, + const string& edge_binding, + const string& prev_binding, + const string& next_binding, + vector>& conditions, + unordered_map& alias_map, + int32_t& extra_alias_counter) { + switch (edge_type) { + case PGQMatchType::MATCH_EDGE_ANY: { + select_node->modifiers.push_back(make_uniq()); + EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions); + break; + } + case PGQMatchType::MATCH_EDGE_LEFT: + EdgeTypeLeft(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, + edge_binding, prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_RIGHT: + EdgeTypeRight(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, + edge_binding, prev_binding, next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + EdgeTypeLeftRight(edge_table, edge_binding, + prev_binding, next_binding, conditions, + alias_map, extra_alias_counter); + break; + } + default: + throw InternalException("Unknown match type found"); + } + } + + void PGQMatchFunction::AddPathFinding(const unique_ptr& select_node, + unique_ptr& from_clause, + vector>& conditions, + const string& prev_binding, const string& edge_binding, + const string& next_binding, + const shared_ptr& edge_table, + const SubPath* subpath) { + //! START + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + select_node->cte_map.map["cte1"] = CreateCSRCTE( + edge_table, prev_binding, + edge_binding, + next_binding); + // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); + + //! src alias (FROM src a) + // auto src_vertex_ref = make_uniq(); + // src_vertex_ref->table_name = edge_table->source_reference; + // src_vertex_ref->alias = prev_binding; + + + // cross_join_src_dst->left = std::move(src_vertex_ref); + + //! dst alias (FROM dst b) + // auto dst_vertex_ref = make_uniq(); + // dst_vertex_ref->table_name = edge_table->destination_reference; + // dst_vertex_ref->alias = next_binding; + + // cross_join_src_dst->right = std::move(dst_vertex_ref); + + //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x + // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); + // cross_join_with_cte->left = std::move(temp_cte_select_subquery); + // cross_join_with_cte->right = std::move(cross_join_src_dst); + + auto temp_cte_select_subquery = CreateCountCTESubquery(); + // from_clause = std::move(temp_cte_select_subquery); + + if (from_clause) { + // create a cross join since there is already something in the + // from clause + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(from_clause); + from_join->right = std::move(temp_cte_select_subquery); + from_clause = std::move(from_join); + } else { + from_clause = std::move(temp_cte_select_subquery); + } + //! END + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + + //! START + //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) + //! from dst c, a.rowid, b.rowid) between lower and upper + + auto src_row_id = make_uniq( + "rowid", prev_binding); + auto dst_row_id = make_uniq( + "rowid", next_binding); + auto csr_id = + make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back(std::move(GetCountTable( + edge_table, prev_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto reachability_function = make_uniq( + "iterativelength", std::move(pathfinding_children)); + + auto cte_col_ref = make_uniq("temp", "__x"); + + vector> addition_children; + addition_children.push_back(std::move(cte_col_ref)); + addition_children.push_back(std::move(reachability_function)); + + auto addition_function = make_uniq( + "add", std::move(addition_children)); + auto lower_limit = + make_uniq(Value::INTEGER(static_cast(subpath->lower))); + auto upper_limit = + make_uniq(Value::INTEGER(static_cast(subpath->upper))); + auto between_expression = make_uniq( + std::move(addition_function), std::move(lower_limit), + std::move(upper_limit)); + conditions.push_back(std::move(between_expression)); + + //! END + //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) + //! from src s, a.rowid, b.rowid) between lower and upper + } + + bool PGQMatchFunction::CheckNamedSubpath(string subpath_name, vector> &column_list) { + for (unique_ptr &column : column_list) { + ColumnRefExpression* column_ref = dynamic_cast(column.get()); + if (column_ref == nullptr) { + continue; + } + if (column_ref->column_names[0] == subpath_name) { + return true; + } + } + return false; + } + + void PGQMatchFunction::ProcessPathList(vector>& path_list, + vector>& conditions, + unique_ptr& from_clause, unique_ptr& select_node, + unordered_map& alias_map, + CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, + vector>& column_list) { + PathElement* previous_vertex_element = + GetPathElement(path_list[0]); + if (!previous_vertex_element) { + // todo(dtenwolde) handle named subpaths. + const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + if (previous_vertex_subpath->where_clause) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (previous_vertex_subpath->path_list.size() == 1) { previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); } else { - throw NotImplementedException("Named subpaths are not yet supported."); + // Add the shortest path if the name is found in the column_list + if (CheckNamedSubpath(previous_vertex_subpath->path_variable, column_list)) { + column_list.push_back(CreatePathFindingFunction(previous_vertex_subpath->path_list)); + } + ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, + alias_map, pg_table, extra_alias_counter, column_list); + return; } - // UnnestSubpath(path_pattern->path_elements[0], conditions, from_clause); - -// auto subpath_pattern_subquery = GenerateSubpathPatternSubquery( -// path_pattern, pg_table, ref->column_list, named_subpaths); -// if (from_clause) { -// // The from clause already contains TableRefs, so we need to make a join -// // with the subquery -// auto from_join = make_uniq(JoinRefType::CROSS); -// from_join->left = std::move(from_clause); -// from_join->right = std::move(subpath_pattern_subquery); -// from_clause = std::move(from_join); -// } else { -// // The from clause was still empty, so we can just place the subquery -// // there -// from_clause = std::move(subpath_pattern_subquery); -// } - } - auto previous_vertex_table = - FindGraphTable(previous_vertex_element->label, *pg_table); - CheckInheritance(previous_vertex_table, previous_vertex_element, - conditions); - alias_map[previous_vertex_element->variable_binding] = - previous_vertex_table->table_name; - - for (idx_t idx_j = 1; - idx_j < ref->path_patterns[idx_i]->path_elements.size(); - idx_j = idx_j + 2) { - PathElement *next_vertex_element = - GetPathElement(path_pattern->path_elements[idx_j + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = - reinterpret_cast(path_pattern->path_elements[idx_j + 1].get()); + } + auto previous_vertex_table = + FindGraphTable(previous_vertex_element->label, pg_table); + CheckInheritance(previous_vertex_table, previous_vertex_element, + conditions); + alias_map[previous_vertex_element->variable_binding] = + previous_vertex_table->table_name; + + for (idx_t idx_j = 1; + idx_j < path_list.size(); + idx_j = idx_j + 2) { + PathElement* next_vertex_element = + GetPathElement(path_list[idx_j + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_list[idx_j + 1].get()); if (next_vertex_subpath->path_list.size() > 1) { throw NotImplementedException("Recursive patterns are not yet supported."); } - // Check the size of the subpath path list - // if size == 1: - // Path Element with a WHERE - // (){3} Repeated vertices are not supported - // Else: - // Unsure if this is possible to reach. Perhaps at some point with a nested pattern? - // Will be unsupported for now - if (next_vertex_subpath->where_clause) { - conditions.push_back(std::move(next_vertex_subpath->where_clause)); - } - next_vertex_element = + // Check the size of the subpath path list + // if size == 1: + // Path Element with a WHERE + // (){3} Repeated vertices are not supported + // Else: + // Unsure if this is possible to reach. Perhaps at some point with a nested pattern? + // Will be unsupported for now + if (next_vertex_subpath->where_clause) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); - } - if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - throw BinderException("Vertex and edge patterns must be alternated."); - } - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, *pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; - - PathElement *edge_element = - GetPathElement(path_pattern->path_elements[idx_j]); - if (!edge_element) { - // We are dealing with a subpath - auto edge_subpath = reinterpret_cast(path_pattern->path_elements[idx_j].get()); + } + if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + throw BinderException("Vertex and edge patterns must be alternated."); + } + auto next_vertex_table = + FindGraphTable(next_vertex_element->label, pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; + + PathElement* edge_element = + GetPathElement(path_list[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); if (edge_subpath->where_clause) { conditions.push_back(std::move(edge_subpath->where_clause)); } - if (edge_subpath->path_list.size() > 1) { - // todo(dtenwolde) deal with multiple elements in subpath - throw NotImplementedException("Subpath with multiple elements is not yet supported."); - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - if (edge_subpath->upper > 1) { - // Add the path-finding - AddPathFinding(select_node, from_clause, conditions, - previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding, - edge_table, edge_subpath); - } else { - alias_map[edge_element->variable_binding] = edge_table->source_reference; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, - edge_element->variable_binding, previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); - } - } else { - // The edge element is a path element without WHERE or path-finding. - auto edge_table = FindGraphTable(edge_element->label, *pg_table); - CheckInheritance(edge_table, edge_element, conditions); - // check aliases - alias_map[edge_element->variable_binding] = edge_table->table_name; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type,edge_element->variable_binding, - previous_vertex_element->variable_binding, next_vertex_element->variable_binding, - conditions, alias_map, extra_alias_counter); - // Check the edge type - // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id - // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id - // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR - // (b.dst = a.id AND b.src - // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND - // (b.dst = a.id AND b.src - //= c.id) - } - previous_vertex_element = next_vertex_element; - previous_vertex_table = next_vertex_table; - } - } - - // Go through all aliases encountered - for (auto &table_alias_entry : alias_map) { - auto table_ref = make_uniq(); - table_ref->table_name = table_alias_entry.second; - table_ref->alias = table_alias_entry.first; + if (edge_subpath->path_list.size() > 1) { + // todo(dtenwolde) deal with multiple elements in subpath + throw NotImplementedException("Subpath on an edge is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + + if (edge_subpath->upper > 1) { + // Add the path-finding + AddPathFinding(select_node, from_clause, conditions, + previous_vertex_element->variable_binding, + edge_element->variable_binding, + next_vertex_element->variable_binding, + edge_table, edge_subpath); + } + else { + alias_map[edge_element->variable_binding] = edge_table->source_reference; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); + } + } else { + // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + alias_map[edge_element->variable_binding] = edge_table->table_name; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, edge_element->variable_binding, + previous_vertex_element->variable_binding, next_vertex_element->variable_binding, + conditions, alias_map, extra_alias_counter); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR + // (b.dst = a.id AND b.src + // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) + } + previous_vertex_element = next_vertex_element; + previous_vertex_table = next_vertex_table; + } + } - if (from_clause) { - auto new_root = make_uniq(JoinRefType::CROSS); - new_root->left = std::move(from_clause); - new_root->right = std::move(table_ref); - from_clause = std::move(new_root); - } else { - from_clause = std::move(table_ref); + + unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext& context, + TableFunctionBindInput&) { + auto duckpgq_state_entry = context.registered_state.find("duckpgq"); + auto duckpgq_state = dynamic_cast(duckpgq_state_entry->second.get()); + + auto ref = dynamic_cast( + duckpgq_state->transform_expression.get()); + auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + + auto data = make_uniq(); + + vector> conditions; + + auto select_node = make_uniq(); + unordered_map alias_map; + unique_ptr from_clause; + + int32_t extra_alias_counter = 0; + for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { + auto& path_pattern = ref->path_patterns[idx_i]; + // Check if the element is PathElement or a Subpath with potentially many items + ProcessPathList(path_pattern->path_elements, conditions, from_clause, select_node, + alias_map, *pg_table, extra_alias_counter, ref->column_list); + } + + // Go through all aliases encountered + for (auto& table_alias_entry: alias_map) { + auto table_ref = make_uniq(); + table_ref->table_name = table_alias_entry.second; + table_ref->alias = table_alias_entry.first; + + if (from_clause) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(from_clause); + new_root->right = std::move(table_ref); + from_clause = std::move(new_root); + } else { + from_clause = std::move(table_ref); + } } + + select_node->from_table = std::move(from_clause); + + if (ref->where_clause) { + conditions.push_back(std::move(ref->where_clause)); + } + std::vector> final_column_list; + + for (auto& expression: ref->column_list) { + unordered_set named_subpaths; + auto column_ref = dynamic_cast(expression.get()); + if (column_ref != nullptr) { + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + final_column_list.emplace_back(make_uniq( + "path", column_ref->column_names[0])); + } else { + final_column_list.push_back(std::move(expression)); + } + continue; + } + auto function_ref = dynamic_cast(expression.get()); + if (function_ref != nullptr) { + if (function_ref->function_name == "path_length") { + column_ref = dynamic_cast( + function_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + auto path_ref = make_uniq( + "path", column_ref->column_names[0]); + vector> path_children; + path_children.push_back(std::move(path_ref)); + auto path_len = + make_uniq("len", std::move(path_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto div_expression = + make_uniq("//", std::move(div_children)); + div_expression->alias = + "path_length_" + column_ref->column_names[0]; + final_column_list.emplace_back(std::move(div_expression)); + } + } else { + final_column_list.push_back(std::move(expression)); + } + + continue; + } + + final_column_list.push_back(std::move(expression)); + } + + select_node->where_clause = CreateWhereClause(conditions); + select_node->select_list = std::move(final_column_list); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery), ref->alias); + + return std::move(result); } - select_node->from_table = std::move(from_clause); - - if (ref->where_clause) { - conditions.push_back(std::move(ref->where_clause)); - } - std::vector> final_column_list; - - for (auto &expression : ref->column_list) { - unordered_set named_subpaths; - auto column_ref = dynamic_cast(expression.get()); - if (column_ref != nullptr) { - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - final_column_list.emplace_back(make_uniq( - "path", column_ref->column_names[0])); - } else { - final_column_list.push_back(std::move(expression)); - } - continue; - } - auto function_ref = dynamic_cast(expression.get()); - if (function_ref != nullptr) { - if (function_ref->function_name == "path_length") { - column_ref = dynamic_cast( - function_ref->children[0].get()); - if (column_ref == nullptr) { - continue; - } - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - auto path_ref = make_uniq( - "path", column_ref->column_names[0]); - vector> path_children; - path_children.push_back(std::move(path_ref)); - auto path_len = - make_uniq("len", std::move(path_children)); - auto constant_two = make_uniq(Value::INTEGER(2)); - vector> div_children; - div_children.push_back(std::move(path_len)); - div_children.push_back(std::move(constant_two)); - auto div_expression = - make_uniq("//", std::move(div_children)); - div_expression->alias = - "path_length_" + column_ref->column_names[0]; - final_column_list.emplace_back(std::move(div_expression)); - } - } else { - final_column_list.push_back(std::move(expression)); - } - - continue; - } - - final_column_list.push_back(std::move(expression)); - } - - select_node->where_clause = CreateWhereClause(conditions); - select_node->select_list = std::move(final_column_list); - - auto subquery = make_uniq(); - subquery->node = std::move(select_node); - - auto result = make_uniq(std::move(subquery), ref->alias); - - return std::move(result); -} -// -//unique_ptr PGQMatchFunction::GenerateSubpathPatternSubquery( -// unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, -// vector> &column_list, -// unordered_set &named_subpaths) { -// vector> conditions; -// auto path_element = -// reinterpret_cast(path_pattern->path_elements[0].get()); -// auto select_node = make_uniq(); -// unordered_map alias_map; -// string named_subpath = path_element->path_variable; -// named_subpaths.insert(named_subpath); -// int32_t extra_alias_counter = 0; -// bool path_finding = false; -// auto previous_vertex_element = -// GetPathElement(path_element->path_list[0], conditions); -// auto previous_vertex_table = -// FindGraphTable(previous_vertex_element->label, *pg_table); -// CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); -// alias_map[previous_vertex_element->variable_binding] = -// previous_vertex_table->table_name; -// for (idx_t idx_j = 1; idx_j < path_element->path_list.size(); -// idx_j = idx_j + 2) { -// PathElement *edge_element = -// GetPathElement(path_element->path_list[idx_j], conditions); -// PathElement *next_vertex_element = -// GetPathElement(path_element->path_list[idx_j + 1], conditions); -// if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || -// previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { -// throw BinderException("Vertex and edge patterns must be alternated."); -// } -// -// auto edge_table = FindGraphTable(edge_element->label, *pg_table); -// CheckInheritance(edge_table, edge_element, conditions); -// auto next_vertex_table = -// FindGraphTable(next_vertex_element->label, *pg_table); -// CheckInheritance(next_vertex_table, next_vertex_element, conditions); -// -// if (path_element->path_list[idx_j]->path_reference_type == -// PGQPathReferenceType::SUBPATH) { -// auto *subpath = -// reinterpret_cast(path_element->path_list[idx_j].get()); -// if (subpath->upper > 1) { -// path_finding = true; -// if (!named_subpath.empty() && path_pattern->shortest) { -// // todo(dtenwolde) does not necessarily have to be a shortest path -// // query if it is a named subpath. It can also be a basic pattern -// // matching that is named. -// auto shortest_path_function = CreatePathFindingFunction( -// previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, edge_table, -// "shortestpath"); -// shortest_path_function->alias = "path"; -// select_node->select_list.push_back(std::move(shortest_path_function)); -// } -// select_node->cte_map.map["cte1"] = -// CreateCSRCTE(edge_table, previous_vertex_element->variable_binding, -// edge_element->variable_binding, -// next_vertex_element->variable_binding); -// -// //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x -// auto temp_cte_select_subquery = CreateCountCTESubquery(); -// -// auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); -// -// //! src alias (FROM src a) -// auto src_vertex_ref = make_uniq(); -// src_vertex_ref->table_name = edge_table->source_reference; -// src_vertex_ref->alias = previous_vertex_element->variable_binding; -// -// cross_join_src_dst->left = std::move(src_vertex_ref); -// -// //! dst alias (FROM dst b) -// auto dst_vertex_ref = make_uniq(); -// dst_vertex_ref->table_name = edge_table->destination_reference; -// dst_vertex_ref->alias = next_vertex_element->variable_binding; -// -// cross_join_src_dst->right = std::move(dst_vertex_ref); -// -// auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); -// cross_join_with_cte->left = std::move(temp_cte_select_subquery); -// cross_join_with_cte->right = std::move(cross_join_src_dst); -// -// if (select_node->from_table) { -// // create a cross join since there is already something in the from -// // clause -// auto from_join = make_uniq(JoinRefType::CROSS); -// from_join->left = std::move(select_node->from_table); -// from_join->right = std::move(cross_join_with_cte); -// select_node->from_table = std::move(from_join); -// } else { -// select_node->from_table = std::move(cross_join_with_cte); -// } -// //! END -// //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b -// -// //! START -// //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) from -// //! dst c, a.rowid, b.rowid) between lower and upper -// auto reachability_function = -// CreatePathFindingFunction(previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, -// edge_table, "iterativelength"); -// -// auto cte_col_ref = make_uniq("temp", "__x"); -// -// vector> addition_children; -// addition_children.push_back(std::move(cte_col_ref)); -// addition_children.push_back(std::move(reachability_function)); -// -// auto addition_function = -// make_uniq("add", std::move(addition_children)); -// auto lower_limit = -// make_uniq(Value::BIGINT(subpath->lower)); -// auto upper_limit = -// make_uniq(Value::BIGINT(subpath->upper)); -// auto between_expression = make_uniq( -// std::move(addition_function), std::move(lower_limit), -// std::move(upper_limit)); -// conditions.push_back(std::move(between_expression)); -// -// //! END -// //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) from -// //! src s, a.rowid, b.rowid) between lower and upper -// } -// // check aliases -// alias_map[next_vertex_element->variable_binding] = -// next_vertex_table->table_name; -// alias_map[edge_element->variable_binding] = edge_table->table_name; -// if (!path_finding) { -// switch (edge_element->match_type) { -// case PGQMatchType::MATCH_EDGE_ANY: { -// select_node->modifiers.push_back(make_uniq()); -// EdgeTypeAny(edge_table, edge_element->variable_binding, -// previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, conditions); -// break; -// } -// case PGQMatchType::MATCH_EDGE_LEFT: -// EdgeTypeLeft(edge_table, next_vertex_table->table_name, -// previous_vertex_table->table_name, -// edge_element->variable_binding, -// previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, conditions); -// break; -// case PGQMatchType::MATCH_EDGE_RIGHT: -// EdgeTypeRight(edge_table, next_vertex_table->table_name, -// previous_vertex_table->table_name, -// edge_element->variable_binding, -// previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, conditions); -// break; -// case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { -// EdgeTypeLeftRight(edge_table, edge_element->variable_binding, -// previous_vertex_element->variable_binding, -// next_vertex_element->variable_binding, conditions, -// alias_map, extra_alias_counter); -// break; -// } -// default: -// throw InternalException("Unknown match type found"); -// } -// } -// previous_vertex_element = next_vertex_element; -// previous_vertex_table = next_vertex_table; -// } -// } -// -// select_node->where_clause = CreateWhereClause(conditions); -// vector> substitute_column_list; -// for (auto &expression : column_list) { -// const auto &column_ref = -// dynamic_cast(expression.get()); -// if (column_ref == nullptr) { -// continue; -// } -// // If the table is referenced in this subquery (count() > 0) -// if (alias_map.count(column_ref->column_names[0])) { -// select_node->select_list.push_back(std::move(expression)); -// // Create a substitute -// unique_ptr new_upper_column_ref; -// if (column_ref->alias.empty()) { -// new_upper_column_ref = make_uniq( -// column_ref->column_names[1], named_subpath); -// } else { -// new_upper_column_ref = -// make_uniq(column_ref->alias, named_subpath); -// } -// new_upper_column_ref->alias = column_ref->alias; -// substitute_column_list.push_back(std::move(new_upper_column_ref)); -// } -// } -// // Remove the elements from the original column_list that are now NULL -// for (auto it = column_list.begin(); it != column_list.end();) { -// if (!*it) { -// it = column_list.erase(it); -// } else { -// ++it; -// } -// } -// // Add the ColumnRefs that were previously moved to the subquery with the -// // subquery name as table_name -// for (auto &expression : substitute_column_list) { -// column_list.push_back(std::move(expression)); -// } -// auto subquery = make_uniq(); -// subquery->node = std::move(select_node); -// -// return make_uniq(std::move(subquery), named_subpath); -//} + // + //unique_ptr PGQMatchFunction::GenerateSubpathPatternSubquery( + // unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, + // vector> &column_list, + // unordered_set &named_subpaths) { + // vector> conditions; + // auto path_element = + // reinterpret_cast(path_pattern->path_elements[0].get()); + // auto select_node = make_uniq(); + // unordered_map alias_map; + // string named_subpath = path_element->path_variable; + // named_subpaths.insert(named_subpath); + // int32_t extra_alias_counter = 0; + // bool path_finding = false; + // auto previous_vertex_element = + // GetPathElement(path_element->path_list[0], conditions); + // auto previous_vertex_table = + // FindGraphTable(previous_vertex_element->label, *pg_table); + // CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); + // alias_map[previous_vertex_element->variable_binding] = + // previous_vertex_table->table_name; + // for (idx_t idx_j = 1; idx_j < path_element->path_list.size(); + // idx_j = idx_j + 2) { + // PathElement *edge_element = + // GetPathElement(path_element->path_list[idx_j], conditions); + // PathElement *next_vertex_element = + // GetPathElement(path_element->path_list[idx_j + 1], conditions); + // if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + // previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + // throw BinderException("Vertex and edge patterns must be alternated."); + // } + // + // auto edge_table = FindGraphTable(edge_element->label, *pg_table); + // CheckInheritance(edge_table, edge_element, conditions); + // auto next_vertex_table = + // FindGraphTable(next_vertex_element->label, *pg_table); + // CheckInheritance(next_vertex_table, next_vertex_element, conditions); + // + // if (path_element->path_list[idx_j]->path_reference_type == + // PGQPathReferenceType::SUBPATH) { + // auto *subpath = + // reinterpret_cast(path_element->path_list[idx_j].get()); + // if (subpath->upper > 1) { + // path_finding = true; + // if (!named_subpath.empty() && path_pattern->shortest) { + // // todo(dtenwolde) does not necessarily have to be a shortest path + // // query if it is a named subpath. It can also be a basic pattern + // // matching that is named. + // auto shortest_path_function = CreatePathFindingFunction( + // previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, edge_table, + // "shortestpath"); + // shortest_path_function->alias = "path"; + // select_node->select_list.push_back(std::move(shortest_path_function)); + // } + // select_node->cte_map.map["cte1"] = + // CreateCSRCTE(edge_table, previous_vertex_element->variable_binding, + // edge_element->variable_binding, + // next_vertex_element->variable_binding); + // + // //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x + // auto temp_cte_select_subquery = CreateCountCTESubquery(); + // + // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); + // + // //! src alias (FROM src a) + // auto src_vertex_ref = make_uniq(); + // src_vertex_ref->table_name = edge_table->source_reference; + // src_vertex_ref->alias = previous_vertex_element->variable_binding; + // + // cross_join_src_dst->left = std::move(src_vertex_ref); + // + // //! dst alias (FROM dst b) + // auto dst_vertex_ref = make_uniq(); + // dst_vertex_ref->table_name = edge_table->destination_reference; + // dst_vertex_ref->alias = next_vertex_element->variable_binding; + // + // cross_join_src_dst->right = std::move(dst_vertex_ref); + // + // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); + // cross_join_with_cte->left = std::move(temp_cte_select_subquery); + // cross_join_with_cte->right = std::move(cross_join_src_dst); + // + // if (select_node->from_table) { + // // create a cross join since there is already something in the from + // // clause + // auto from_join = make_uniq(JoinRefType::CROSS); + // from_join->left = std::move(select_node->from_table); + // from_join->right = std::move(cross_join_with_cte); + // select_node->from_table = std::move(from_join); + // } else { + // select_node->from_table = std::move(cross_join_with_cte); + // } + // //! END + // //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b + // + // //! START + // //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) from + // //! dst c, a.rowid, b.rowid) between lower and upper + // auto reachability_function = + // CreatePathFindingFunction(previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, + // edge_table, "iterativelength"); + // + // auto cte_col_ref = make_uniq("temp", "__x"); + // + // vector> addition_children; + // addition_children.push_back(std::move(cte_col_ref)); + // addition_children.push_back(std::move(reachability_function)); + // + // auto addition_function = + // make_uniq("add", std::move(addition_children)); + // auto lower_limit = + // make_uniq(Value::BIGINT(subpath->lower)); + // auto upper_limit = + // make_uniq(Value::BIGINT(subpath->upper)); + // auto between_expression = make_uniq( + // std::move(addition_function), std::move(lower_limit), + // std::move(upper_limit)); + // conditions.push_back(std::move(between_expression)); + // + // //! END + // //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) from + // //! src s, a.rowid, b.rowid) between lower and upper + // } + // // check aliases + // alias_map[next_vertex_element->variable_binding] = + // next_vertex_table->table_name; + // alias_map[edge_element->variable_binding] = edge_table->table_name; + // if (!path_finding) { + // switch (edge_element->match_type) { + // case PGQMatchType::MATCH_EDGE_ANY: { + // select_node->modifiers.push_back(make_uniq()); + // EdgeTypeAny(edge_table, edge_element->variable_binding, + // previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, conditions); + // break; + // } + // case PGQMatchType::MATCH_EDGE_LEFT: + // EdgeTypeLeft(edge_table, next_vertex_table->table_name, + // previous_vertex_table->table_name, + // edge_element->variable_binding, + // previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, conditions); + // break; + // case PGQMatchType::MATCH_EDGE_RIGHT: + // EdgeTypeRight(edge_table, next_vertex_table->table_name, + // previous_vertex_table->table_name, + // edge_element->variable_binding, + // previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, conditions); + // break; + // case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + // EdgeTypeLeftRight(edge_table, edge_element->variable_binding, + // previous_vertex_element->variable_binding, + // next_vertex_element->variable_binding, conditions, + // alias_map, extra_alias_counter); + // break; + // } + // default: + // throw InternalException("Unknown match type found"); + // } + // } + // previous_vertex_element = next_vertex_element; + // previous_vertex_table = next_vertex_table; + // } + // } + // + // select_node->where_clause = CreateWhereClause(conditions); + // vector> substitute_column_list; + // for (auto &expression : column_list) { + // const auto &column_ref = + // dynamic_cast(expression.get()); + // if (column_ref == nullptr) { + // continue; + // } + // // If the table is referenced in this subquery (count() > 0) + // if (alias_map.count(column_ref->column_names[0])) { + // select_node->select_list.push_back(std::move(expression)); + // // Create a substitute + // unique_ptr new_upper_column_ref; + // if (column_ref->alias.empty()) { + // new_upper_column_ref = make_uniq( + // column_ref->column_names[1], named_subpath); + // } else { + // new_upper_column_ref = + // make_uniq(column_ref->alias, named_subpath); + // } + // new_upper_column_ref->alias = column_ref->alias; + // substitute_column_list.push_back(std::move(new_upper_column_ref)); + // } + // } + // // Remove the elements from the original column_list that are now NULL + // for (auto it = column_list.begin(); it != column_list.end();) { + // if (!*it) { + // it = column_list.erase(it); + // } else { + // ++it; + // } + // } + // // Add the ColumnRefs that were previously moved to the subquery with the + // // subquery name as table_name + // for (auto &expression : substitute_column_list) { + // column_list.push_back(std::move(expression)); + // } + // auto subquery = make_uniq(); + // subquery->node = std::move(select_node); + // + // return make_uniq(std::move(subquery), named_subpath); + //} } // namespace duckdb diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index 659913b8..ead1e2e6 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -122,8 +122,7 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, if (!duckpgq_parse_data) { throw BinderException("No DuckPGQ parse data found"); } - auto statement = - dynamic_cast(duckpgq_parse_data->statement.get()); + auto statement = duckpgq_parse_data->statement.get(); if (statement->type == StatementType::SELECT_STATEMENT) { auto select_statement = dynamic_cast(statement); auto select_node = dynamic_cast(select_statement->node.get()); @@ -137,13 +136,13 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, function->children.pop_back(); } throw Exception("use duckpgq_bind instead"); - } else if (statement->type == StatementType::CREATE_STATEMENT) { + } if (statement->type == StatementType::CREATE_STATEMENT) { ParserExtensionPlanResult result; result.function = CreatePropertyGraphFunction(); result.requires_valid_transaction = true; result.return_type = StatementReturnType::QUERY_RESULT; return result; - } else if (statement->type == StatementType::DROP_STATEMENT) { + } if (statement->type == StatementType::DROP_STATEMENT) { ParserExtensionPlanResult result; result.function = DropPropertyGraphFunction(); result.requires_valid_transaction = true; diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index aa21e9f2..0009e1eb 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,6 +49,33 @@ EDGE TABLES ( LABEL replyOf ); +query III +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person) + COLUMNS (o) + ) tmp + limit 10; +---- + + +query III +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) + COLUMNS (p.id as p_id, p2.id as p2_id, t.id) + ) tmp + limit 10; +---- +14 32985348833329 3 +14 32985348833329 139 +14 30786325577731 196 +14 28587302322196 280 +14 28587302322180 294 +14 24189255811081 295 +14 28587302322196 448 +14 32985348833329 470 +14 28587302322196 540 +14 24189255811109 543 + query IIIII WITH CTE1 AS (SELECT CREATE_CSR_EDGE( 0, @@ -262,13 +289,7 @@ statement error ---- Parser Error: syntax error at or near "{" -query II --FROM GRAPH_TABLE (snb - MATCH o = ANY SHORTEST (p:Person)-[w:knows]-> {1,3}(p2:Person)-[i:hasInterest]->(t:Tag) - COLUMNS (p.id as p_id, p2.id as p2_id, t.id) - ) tmp - limit 10; ----- + statement ok -FROM GRAPH_TABLE (snb From a0d23b66ebd3a3522a17cf86274d332212c064cc Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 11:36:52 +0100 Subject: [PATCH 22/47] Started on returning the path in a named subpath --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index cc7e2e69..8d29abc6 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit cc7e2e6995606a06c856030a49504e68e9cbead9 +Subproject commit 8d29abc6b27b7c8cfa32a227db82df707eb1e481 From ebbfcfae0a5d0c3a0f93f6c619e161f4f93e47c4 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 11:36:59 +0100 Subject: [PATCH 23/47] Fix pointer may be null --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 81d8afe9..72516f30 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -513,6 +513,7 @@ namespace duckdb { // Set next vertex to be previous continue; } + edge_element = GetPathElement(edge_subpath->path_list[0]); } auto next_vertex_element = GetPathElement(path_list[idx_i+1]); From 2af2fa7db2b70e632ffc0290882d4257fd2f463d Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 14:54:20 +0100 Subject: [PATCH 24/47] Slightly longer patterns now seem to work --- .../functions/tablefunctions/match.cpp | 68 +++++++++++-------- test/sql/path-finding/complex_matching.test | 2 +- 2 files changed, 41 insertions(+), 29 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 72516f30..59edb009 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -485,7 +485,7 @@ namespace duckdb { } unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector> &path_list) { + vector>& path_list) { // This method will return a SubqueryRef of a list of rowids // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend // The difficulty is that there may be a (un)bounded path pattern at some point in the query @@ -497,14 +497,14 @@ namespace duckdb { auto previous_vertex_element = GetPathElement(path_list[0]); if (!previous_vertex_element) { // We hit a vertex element with a WHERE, but we only care about the rowid here - auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); } - for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i=idx_i+2) { + for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { auto edge_element = GetPathElement(path_list[idx_i]); if (!edge_element) { - auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); + auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); if (edge_subpath->upper > 1) { throw NotImplementedException("Returning the path list for (un)bounded paths has not yet been implemented."); // (un)bounded shortest path @@ -516,31 +516,34 @@ namespace duckdb { edge_element = GetPathElement(edge_subpath->path_list[0]); } - auto next_vertex_element = GetPathElement(path_list[idx_i+1]); + auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); if (!next_vertex_element) { - auto next_vertex_subpath = reinterpret_cast(path_list[idx_i+1].get()); + auto next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); } + auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); + auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); + auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); + auto starting_list_children = vector>(); if (!final_list) { - auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); - auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); - auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); - auto starting_list_children = vector>(); starting_list_children.push_back(std::move(previous_rowid)); - auto starting_list = make_uniq("list", std::move(starting_list_children)); - auto list_append_children = vector>(); - list_append_children.push_back(std::move(final_list)); - list_append_children.push_back(std::move(edge_rowid)); - auto list_append_ = make_uniq("list_append", std::move(list_append_children)); - - list_append_children = vector>(); - list_append_children.push_back(std::move(list_append_)); - list_append_children.push_back(std::move(next_rowid)); - final_list = make_uniq("list_append", std::move(list_append_children)); + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + final_list = make_uniq("list_value", std::move(starting_list_children)); + } else { + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + auto next_elements_list = make_uniq("list_value", std::move(starting_list_children)); + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(next_elements_list)); + final_list = make_uniq("list_concat", std::move(final_list_children)); } } + + return final_list; // auto src_row_id = make_uniq("rowid", prev_binding); @@ -690,17 +693,24 @@ namespace duckdb { //! from src s, a.rowid, b.rowid) between lower and upper } - bool PGQMatchFunction::CheckNamedSubpath(string subpath_name, vector> &column_list) { - for (unique_ptr &column : column_list) { - ColumnRefExpression* column_ref = dynamic_cast(column.get()); + bool PGQMatchFunction::CheckNamedSubpath(string subpath_name, vector>& column_list) { + bool found = false; + idx_t idx_named_subpath = 0; + for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { + ColumnRefExpression* column_ref = dynamic_cast(column_list[idx_i].get()); if (column_ref == nullptr) { continue; } if (column_ref->column_names[0] == subpath_name) { - return true; + idx_named_subpath = idx_i; + found = true; + break; } } - return false; + if (found) { + column_list.erase(column_list.begin() + idx_named_subpath); + } + return found; } void PGQMatchFunction::ProcessPathList(vector>& path_list, @@ -722,7 +732,10 @@ namespace duckdb { } else { // Add the shortest path if the name is found in the column_list if (CheckNamedSubpath(previous_vertex_subpath->path_variable, column_list)) { - column_list.push_back(CreatePathFindingFunction(previous_vertex_subpath->path_list)); + auto path_finding_list_column = CreatePathFindingFunction(previous_vertex_subpath->path_list); + // TODO uncomment next line to get the column alias + // path_finding_list_column->alias = previous_vertex_subpath->path_variable; + column_list.push_back(std::move(path_finding_list_column)); } ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, alias_map, pg_table, extra_alias_counter, column_list); @@ -792,8 +805,7 @@ namespace duckdb { edge_element->variable_binding, next_vertex_element->variable_binding, edge_table, edge_subpath); - } - else { + } else { alias_map[edge_element->variable_binding] = edge_table->source_reference; AddEdgeJoins(select_node, edge_table, previous_vertex_table, next_vertex_table, edge_element->match_type, diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 0009e1eb..24c81d54 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -51,7 +51,7 @@ EDGE TABLES ( query III -FROM GRAPH_TABLE (snb - MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person) + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) COLUMNS (o) ) tmp limit 10; From 5e5e1e5b3fc145f0934c683d351cfebeda05ef2c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:23:56 +0100 Subject: [PATCH 25/47] Refactoring --- .../functions/tablefunctions/match.hpp | 7 +- .../functions/tablefunctions/match.cpp | 81 +++++++++++-------- test/sql/path-finding/complex_matching.test | 7 ++ 3 files changed, 60 insertions(+), 35 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 36fac4a0..8d7174fe 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #pragma once +#include + #include "duckdb/function/table_function.hpp" #include "duckdb/parser/parsed_data/create_property_graph_info.hpp" #include "duckdb/parser/path_element.hpp" @@ -104,7 +106,7 @@ namespace duckdb { unordered_set& named_subpaths); static unique_ptr - CreatePathFindingFunction(vector> &path_list); + CreatePathFindingFunction(vector> &path_list, CreatePropertyGraphInfo &pg_table); static void AddPathFinding(const unique_ptr& select_node, @@ -133,6 +135,7 @@ namespace duckdb { CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, vector>& column_list); - static bool CheckNamedSubpath(string subpath_name, vector>& column_list); + static bool CheckNamedSubpath(SubPath &subpath, vector>& column_list, + CreatePropertyGraphInfo &pg_table); }; } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 59edb009..1a4c9cba 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -485,7 +485,7 @@ namespace duckdb { } unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector>& path_list) { + vector>& path_list, CreatePropertyGraphInfo &pg_table) { // This method will return a SubqueryRef of a list of rowids // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend // The difficulty is that there may be a (un)bounded path pattern at some point in the query @@ -502,25 +502,54 @@ namespace duckdb { } for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { + auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + auto edge_element = GetPathElement(path_list[idx_i]); if (!edge_element) { auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); if (edge_subpath->upper > 1) { - throw NotImplementedException("Returning the path list for (un)bounded paths has not yet been implemented."); // (un)bounded shortest path // Add the shortest path UDF - + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + auto src_row_id = make_uniq("rowid", previous_vertex_element->variable_binding); + auto dst_row_id = make_uniq("rowid", next_vertex_element->variable_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back( + std::move(GetCountTable(edge_table, previous_vertex_element->variable_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto shortest_path_function = make_uniq("shortestpath", + std::move(pathfinding_children)); + + if (!final_list) { + final_list = std::move(shortest_path_function); + } else { + auto pop_front_shortest_path_children = vector>(); + pop_front_shortest_path_children.push_back(std::move(shortest_path_function)); + auto pop_front = make_uniq("array_pop_front", std::move(pop_front_shortest_path_children)); + + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(pop_front)); + final_list = make_uniq("list_concat", std::move(final_list_children)); + } // Set next vertex to be previous + previous_vertex_element = next_vertex_element; continue; } edge_element = GetPathElement(edge_subpath->path_list[0]); } - auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); - next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); - } + auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); @@ -540,25 +569,12 @@ namespace duckdb { final_list_children.push_back(std::move(next_elements_list)); final_list = make_uniq("list_concat", std::move(final_list_children)); } + previous_vertex_element = next_vertex_element; } - - return final_list; - // auto src_row_id = make_uniq("rowid", prev_binding); - // auto dst_row_id = make_uniq("rowid", next_binding); - // auto csr_id = make_uniq(Value::INTEGER(0)); - // - // vector> pathfinding_children; - // pathfinding_children.push_back(std::move(csr_id)); - // pathfinding_children.push_back( - // std::move(GetCountTable(edge_table, prev_binding))); - // pathfinding_children.push_back(std::move(src_row_id)); - // pathfinding_children.push_back(std::move(dst_row_id)); - // - // return make_uniq(path_finding_udf, - // std::move(pathfinding_children)); + } void PGQMatchFunction::AddEdgeJoins(const unique_ptr& select_node, @@ -693,15 +709,18 @@ namespace duckdb { //! from src s, a.rowid, b.rowid) between lower and upper } - bool PGQMatchFunction::CheckNamedSubpath(string subpath_name, vector>& column_list) { + bool PGQMatchFunction::CheckNamedSubpath(SubPath &subpath, vector>& column_list, + CreatePropertyGraphInfo &pg_table) { bool found = false; idx_t idx_named_subpath = 0; for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { - ColumnRefExpression* column_ref = dynamic_cast(column_list[idx_i].get()); - if (column_ref == nullptr) { + const FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); + if (parsed_ref == nullptr) { continue; } - if (column_ref->column_names[0] == subpath_name) { + if (parsed_ref->function_name == "element_id") { + // Check subpath name matches the function name + idx_named_subpath = idx_i; found = true; break; @@ -709,6 +728,7 @@ namespace duckdb { } if (found) { column_list.erase(column_list.begin() + idx_named_subpath); + column_list.push_back(CreatePathFindingFunction(subpath.path_list, pg_table)); } return found; } @@ -731,12 +751,7 @@ namespace duckdb { previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); } else { // Add the shortest path if the name is found in the column_list - if (CheckNamedSubpath(previous_vertex_subpath->path_variable, column_list)) { - auto path_finding_list_column = CreatePathFindingFunction(previous_vertex_subpath->path_list); - // TODO uncomment next line to get the column alias - // path_finding_list_column->alias = previous_vertex_subpath->path_variable; - column_list.push_back(std::move(path_finding_list_column)); - } + CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, alias_map, pg_table, extra_alias_counter, column_list); return; diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 24c81d54..3d1f1889 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,6 +49,13 @@ EDGE TABLES ( LABEL replyOf ); +query III +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) + COLUMNS (element_id(o)) + ) tmp; +---- + query III -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) From e9324b9ffd333a9f1e1efa93e89cd938edf4bf9a Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:26:31 +0100 Subject: [PATCH 26/47] Format fix --- .../functions/tablefunctions/match.cpp | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 1a4c9cba..c2f41869 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -485,7 +485,7 @@ namespace duckdb { } unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector>& path_list, CreatePropertyGraphInfo &pg_table) { + vector>& path_list, CreatePropertyGraphInfo& pg_table) { // This method will return a SubqueryRef of a list of rowids // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend // The difficulty is that there may be a (un)bounded path pattern at some point in the query @@ -528,14 +528,15 @@ namespace duckdb { pathfinding_children.push_back(std::move(dst_row_id)); auto shortest_path_function = make_uniq("shortestpath", - std::move(pathfinding_children)); + std::move(pathfinding_children)); if (!final_list) { final_list = std::move(shortest_path_function); } else { auto pop_front_shortest_path_children = vector>(); pop_front_shortest_path_children.push_back(std::move(shortest_path_function)); - auto pop_front = make_uniq("array_pop_front", std::move(pop_front_shortest_path_children)); + auto pop_front = make_uniq("array_pop_front", + std::move(pop_front_shortest_path_children)); auto final_list_children = vector>(); final_list_children.push_back(std::move(final_list)); @@ -548,8 +549,6 @@ namespace duckdb { } edge_element = GetPathElement(edge_subpath->path_list[0]); } - - auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); @@ -573,8 +572,6 @@ namespace duckdb { } return final_list; - - } void PGQMatchFunction::AddEdgeJoins(const unique_ptr& select_node, @@ -709,8 +706,8 @@ namespace duckdb { //! from src s, a.rowid, b.rowid) between lower and upper } - bool PGQMatchFunction::CheckNamedSubpath(SubPath &subpath, vector>& column_list, - CreatePropertyGraphInfo &pg_table) { + bool PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, + CreatePropertyGraphInfo& pg_table) { bool found = false; idx_t idx_named_subpath = 0; for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { @@ -718,9 +715,8 @@ namespace duckdb { if (parsed_ref == nullptr) { continue; } - if (parsed_ref->function_name == "element_id") { + if (parsed_ref->function_name == "element_id" && parsed_ref->alias == subpath.path_variable) { // Check subpath name matches the function name - idx_named_subpath = idx_i; found = true; break; @@ -728,7 +724,8 @@ namespace duckdb { } if (found) { column_list.erase(column_list.begin() + idx_named_subpath); - column_list.push_back(CreatePathFindingFunction(subpath.path_list, pg_table)); + column_list.insert(column_list.begin() + idx_named_subpath, + CreatePathFindingFunction(subpath.path_list, pg_table)); } return found; } From 34f5dc3915eac7891c948dd3418b0121931da372 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:32:38 +0100 Subject: [PATCH 27/47] Add move --- .../duckpgq/functions/tablefunctions/match.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index c2f41869..1b1ec81c 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -715,17 +715,21 @@ namespace duckdb { if (parsed_ref == nullptr) { continue; } - if (parsed_ref->function_name == "element_id" && parsed_ref->alias == subpath.path_variable) { + if (parsed_ref->function_name == "element_id") { // Check subpath name matches the function name - idx_named_subpath = idx_i; - found = true; - break; + auto column_ref = dynamic_cast(parsed_ref->children[0].get()); + if (column_ref->column_names[0] == subpath.path_variable) { + idx_named_subpath = idx_i; + found = true; + break; + } } } if (found) { column_list.erase(column_list.begin() + idx_named_subpath); - column_list.insert(column_list.begin() + idx_named_subpath, - CreatePathFindingFunction(subpath.path_list, pg_table)); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + shortest_path_function->alias = subpath.path_variable; + column_list.insert(column_list.begin() + idx_named_subpath, std::move(shortest_path_function)); } return found; } From 1abbc2ff9f24fc3eb56636eee11a5b3586d700bd Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:56:28 +0100 Subject: [PATCH 28/47] Improved tests --- test/sql/path-finding/complex_matching.test | 43 ++++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 3d1f1889..b0d553c5 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -52,18 +52,57 @@ EDGE TABLES ( query III -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) - COLUMNS (element_id(o)) + COLUMNS (element_id(o), p4.id, p.id) ) tmp; ---- +[0, 0, 13, 42, 29, 68, 33] 14 10995116277782 +[0, 0, 13, 42, 29, 69, 36] 14 10995116277782 +[0, 0, 13, 43, 31, 71, 35] 14 10995116277782 +[0, 0, 13, 43, 31, 72, 40] 14 10995116277782 +[0, 0, 13, 43, 31, 73, 45] 14 10995116277782 +[0, 0, 13, 43, 31, 74, 46] 14 10995116277782 +[0, 0, 13, 44, 33, 77, 36] 14 10995116277782 +[0, 0, 13, 44, 33, 78, 38] 14 10995116277782 +[0, 0, 13, 44, 33, 79, 39] 14 10995116277782 +[0, 0, 13, 44, 33, 80, 43] 14 10995116277782 +[0, 0, 13, 45, 36, 82, 45] 14 10995116277782 +[0, 1, 26, 62, 31, 71, 35] 14 24189255811081 +[0, 1, 26, 62, 31, 72, 40] 14 24189255811081 +[0, 1, 26, 62, 31, 73, 45] 14 24189255811081 +[0, 1, 26, 62, 31, 74, 46] 14 24189255811081 +[0, 1, 26, 63, 32, 75, 33] 14 24189255811081 +[0, 1, 26, 63, 32, 76, 36] 14 24189255811081 +[0, 1, 26, 64, 33, 77, 36] 14 24189255811081 +[0, 1, 26, 64, 33, 78, 38] 14 24189255811081 +[0, 1, 26, 64, 33, 79, 39] 14 24189255811081 +[0, 1, 26, 64, 33, 80, 43] 14 24189255811081 +[0, 1, 26, 63, 32, 76, 36, 82, 45] 14 24189255811081 +[0, 2, 32, 75, 33, 77, 36] 14 26388279066668 +[0, 2, 32, 75, 33, 78, 38] 14 26388279066668 +[0, 2, 32, 75, 33, 79, 39] 14 26388279066668 +[0, 2, 32, 75, 33, 80, 43] 14 26388279066668 +[0, 2, 32, 76, 36, 82, 45] 14 26388279066668 -query III +statement error -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) COLUMNS (o) ) tmp limit 10; ---- +Binder Error: Referenced column "o" not found in FROM clause! + +# https://github.com/cwida/duckpgq-extension/issues/68 +statement error +-FROM GRAPH_TABLE (snb + MATCH o = ANY SHORTEST (p:Person)-[w:knows]->(p2:Person)-[w2:knows]->(p3:Person) + COLUMNS (element_id(a)) + ) tmp + limit 10; +---- +Catalog Error: Scalar Function with name element_id does not exist! +Did you mean "element_at"? query III -FROM GRAPH_TABLE (snb From b5b06d82f3e379791ed666fdcc7e9a7f7d72d362 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:56:48 +0100 Subject: [PATCH 29/47] Format fix --- test/sql/path-finding/complex_matching.test | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index b0d553c5..997a2fb1 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -335,8 +335,6 @@ statement error ---- Parser Error: syntax error at or near "{" - - statement ok -FROM GRAPH_TABLE (snb MATCH p = ANY SHORTEST (a:Person where a.id = 28587302322180)-[k:knows]->{1,3}(b:Person) From 8860cf26d3420edaf3c887b3c52c5da45a3bed25 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:58:22 +0100 Subject: [PATCH 30/47] Comment --- duckpgq/src/duckpgq/functions/tablefunctions/match.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 1b1ec81c..73e753c8 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -716,7 +716,7 @@ namespace duckdb { continue; } if (parsed_ref->function_name == "element_id") { - // Check subpath name matches the function name + // Check subpath name matches the column referenced in the function --> element_id(named_subpath) auto column_ref = dynamic_cast(parsed_ref->children[0].get()); if (column_ref->column_names[0] == subpath.path_variable) { idx_named_subpath = idx_i; From 5a9b74dd8abe24ba7b6a358c75cbf6ebff77e8c5 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 16:59:09 +0100 Subject: [PATCH 31/47] Remove commented out code --- .../functions/tablefunctions/match.cpp | 213 ------------------ 1 file changed, 213 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 73e753c8..941c346a 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -743,7 +743,6 @@ namespace duckdb { PathElement* previous_vertex_element = GetPathElement(path_list[0]); if (!previous_vertex_element) { - // todo(dtenwolde) handle named subpaths. const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); if (previous_vertex_subpath->where_clause) { conditions.push_back(std::move(previous_vertex_subpath->where_clause)); @@ -808,7 +807,6 @@ namespace duckdb { conditions.push_back(std::move(edge_subpath->where_clause)); } if (edge_subpath->path_list.size() > 1) { - // todo(dtenwolde) deal with multiple elements in subpath throw NotImplementedException("Subpath on an edge is not yet supported."); } edge_element = GetPathElement(edge_subpath->path_list[0]); @@ -960,215 +958,4 @@ namespace duckdb { return std::move(result); } - - // - //unique_ptr PGQMatchFunction::GenerateSubpathPatternSubquery( - // unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, - // vector> &column_list, - // unordered_set &named_subpaths) { - // vector> conditions; - // auto path_element = - // reinterpret_cast(path_pattern->path_elements[0].get()); - // auto select_node = make_uniq(); - // unordered_map alias_map; - // string named_subpath = path_element->path_variable; - // named_subpaths.insert(named_subpath); - // int32_t extra_alias_counter = 0; - // bool path_finding = false; - // auto previous_vertex_element = - // GetPathElement(path_element->path_list[0], conditions); - // auto previous_vertex_table = - // FindGraphTable(previous_vertex_element->label, *pg_table); - // CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); - // alias_map[previous_vertex_element->variable_binding] = - // previous_vertex_table->table_name; - // for (idx_t idx_j = 1; idx_j < path_element->path_list.size(); - // idx_j = idx_j + 2) { - // PathElement *edge_element = - // GetPathElement(path_element->path_list[idx_j], conditions); - // PathElement *next_vertex_element = - // GetPathElement(path_element->path_list[idx_j + 1], conditions); - // if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - // previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - // throw BinderException("Vertex and edge patterns must be alternated."); - // } - // - // auto edge_table = FindGraphTable(edge_element->label, *pg_table); - // CheckInheritance(edge_table, edge_element, conditions); - // auto next_vertex_table = - // FindGraphTable(next_vertex_element->label, *pg_table); - // CheckInheritance(next_vertex_table, next_vertex_element, conditions); - // - // if (path_element->path_list[idx_j]->path_reference_type == - // PGQPathReferenceType::SUBPATH) { - // auto *subpath = - // reinterpret_cast(path_element->path_list[idx_j].get()); - // if (subpath->upper > 1) { - // path_finding = true; - // if (!named_subpath.empty() && path_pattern->shortest) { - // // todo(dtenwolde) does not necessarily have to be a shortest path - // // query if it is a named subpath. It can also be a basic pattern - // // matching that is named. - // auto shortest_path_function = CreatePathFindingFunction( - // previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, edge_table, - // "shortestpath"); - // shortest_path_function->alias = "path"; - // select_node->select_list.push_back(std::move(shortest_path_function)); - // } - // select_node->cte_map.map["cte1"] = - // CreateCSRCTE(edge_table, previous_vertex_element->variable_binding, - // edge_element->variable_binding, - // next_vertex_element->variable_binding); - // - // //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - // auto temp_cte_select_subquery = CreateCountCTESubquery(); - // - // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); - // - // //! src alias (FROM src a) - // auto src_vertex_ref = make_uniq(); - // src_vertex_ref->table_name = edge_table->source_reference; - // src_vertex_ref->alias = previous_vertex_element->variable_binding; - // - // cross_join_src_dst->left = std::move(src_vertex_ref); - // - // //! dst alias (FROM dst b) - // auto dst_vertex_ref = make_uniq(); - // dst_vertex_ref->table_name = edge_table->destination_reference; - // dst_vertex_ref->alias = next_vertex_element->variable_binding; - // - // cross_join_src_dst->right = std::move(dst_vertex_ref); - // - // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - // cross_join_with_cte->left = std::move(temp_cte_select_subquery); - // cross_join_with_cte->right = std::move(cross_join_src_dst); - // - // if (select_node->from_table) { - // // create a cross join since there is already something in the from - // // clause - // auto from_join = make_uniq(JoinRefType::CROSS); - // from_join->left = std::move(select_node->from_table); - // from_join->right = std::move(cross_join_with_cte); - // select_node->from_table = std::move(from_join); - // } else { - // select_node->from_table = std::move(cross_join_with_cte); - // } - // //! END - // //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x, src a, dst b - // - // //! START - // //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) from - // //! dst c, a.rowid, b.rowid) between lower and upper - // auto reachability_function = - // CreatePathFindingFunction(previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, - // edge_table, "iterativelength"); - // - // auto cte_col_ref = make_uniq("temp", "__x"); - // - // vector> addition_children; - // addition_children.push_back(std::move(cte_col_ref)); - // addition_children.push_back(std::move(reachability_function)); - // - // auto addition_function = - // make_uniq("add", std::move(addition_children)); - // auto lower_limit = - // make_uniq(Value::BIGINT(subpath->lower)); - // auto upper_limit = - // make_uniq(Value::BIGINT(subpath->upper)); - // auto between_expression = make_uniq( - // std::move(addition_function), std::move(lower_limit), - // std::move(upper_limit)); - // conditions.push_back(std::move(between_expression)); - // - // //! END - // //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) from - // //! src s, a.rowid, b.rowid) between lower and upper - // } - // // check aliases - // alias_map[next_vertex_element->variable_binding] = - // next_vertex_table->table_name; - // alias_map[edge_element->variable_binding] = edge_table->table_name; - // if (!path_finding) { - // switch (edge_element->match_type) { - // case PGQMatchType::MATCH_EDGE_ANY: { - // select_node->modifiers.push_back(make_uniq()); - // EdgeTypeAny(edge_table, edge_element->variable_binding, - // previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, conditions); - // break; - // } - // case PGQMatchType::MATCH_EDGE_LEFT: - // EdgeTypeLeft(edge_table, next_vertex_table->table_name, - // previous_vertex_table->table_name, - // edge_element->variable_binding, - // previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, conditions); - // break; - // case PGQMatchType::MATCH_EDGE_RIGHT: - // EdgeTypeRight(edge_table, next_vertex_table->table_name, - // previous_vertex_table->table_name, - // edge_element->variable_binding, - // previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, conditions); - // break; - // case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - // EdgeTypeLeftRight(edge_table, edge_element->variable_binding, - // previous_vertex_element->variable_binding, - // next_vertex_element->variable_binding, conditions, - // alias_map, extra_alias_counter); - // break; - // } - // default: - // throw InternalException("Unknown match type found"); - // } - // } - // previous_vertex_element = next_vertex_element; - // previous_vertex_table = next_vertex_table; - // } - // } - // - // select_node->where_clause = CreateWhereClause(conditions); - // vector> substitute_column_list; - // for (auto &expression : column_list) { - // const auto &column_ref = - // dynamic_cast(expression.get()); - // if (column_ref == nullptr) { - // continue; - // } - // // If the table is referenced in this subquery (count() > 0) - // if (alias_map.count(column_ref->column_names[0])) { - // select_node->select_list.push_back(std::move(expression)); - // // Create a substitute - // unique_ptr new_upper_column_ref; - // if (column_ref->alias.empty()) { - // new_upper_column_ref = make_uniq( - // column_ref->column_names[1], named_subpath); - // } else { - // new_upper_column_ref = - // make_uniq(column_ref->alias, named_subpath); - // } - // new_upper_column_ref->alias = column_ref->alias; - // substitute_column_list.push_back(std::move(new_upper_column_ref)); - // } - // } - // // Remove the elements from the original column_list that are now NULL - // for (auto it = column_list.begin(); it != column_list.end();) { - // if (!*it) { - // it = column_list.erase(it); - // } else { - // ++it; - // } - // } - // // Add the ColumnRefs that were previously moved to the subquery with the - // // subquery name as table_name - // for (auto &expression : substitute_column_list) { - // column_list.push_back(std::move(expression)); - // } - // auto subquery = make_uniq(); - // subquery->node = std::move(select_node); - // - // return make_uniq(std::move(subquery), named_subpath); - //} } // namespace duckdb From 028103015db376196b3ea2e332a2cb18d47cc333 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Wed, 17 Jan 2024 17:07:07 +0100 Subject: [PATCH 32/47] Fix shortest path test --- test/sql/path-finding/shortest_path.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index b249eae4..81c7f5dd 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -47,7 +47,7 @@ query III -FROM GRAPH_TABLE (pg MATCH p = ANY SHORTEST (a:Person WHERE a.name = 'Daniel')-[k:knows]->{1,3}(b:Person) - COLUMNS (p, a.name as name, b.name as b_name) + COLUMNS (element_id(p), a.name as name, b.name as b_name) ) study; ---- [0, 0, 1] Daniel Tavneet From c8a5e0422f8fd430c18feb642a32f40492925262 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 11:03:20 +0100 Subject: [PATCH 33/47] Simplified logic --- .../functions/tablefunctions/match.cpp | 19 ++++--- test/sql/path-finding/complex_matching.test | 56 +++++++++---------- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 941c346a..cc97d034 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -709,7 +709,6 @@ namespace duckdb { bool PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, CreatePropertyGraphInfo& pg_table) { bool found = false; - idx_t idx_named_subpath = 0; for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { const FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); if (parsed_ref == nullptr) { @@ -719,18 +718,20 @@ namespace duckdb { // Check subpath name matches the column referenced in the function --> element_id(named_subpath) auto column_ref = dynamic_cast(parsed_ref->children[0].get()); if (column_ref->column_names[0] == subpath.path_variable) { - idx_named_subpath = idx_i; - found = true; + column_list.erase(column_list.begin() + idx_i); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + shortest_path_function->alias = subpath.path_variable; + column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); break; } } } - if (found) { - column_list.erase(column_list.begin() + idx_named_subpath); - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - shortest_path_function->alias = subpath.path_variable; - column_list.insert(column_list.begin() + idx_named_subpath, std::move(shortest_path_function)); - } + // if (found) { + // column_list.erase(column_list.begin() + idx_named_subpath); + // auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + // shortest_path_function->alias = subpath.path_variable; + // column_list.insert(column_list.begin() + idx_named_subpath, std::move(shortest_path_function)); + // } return found; } diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 997a2fb1..c7a42a2e 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -52,36 +52,36 @@ EDGE TABLES ( query III -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) - COLUMNS (element_id(o), p4.id, p.id) + COLUMNS (p3.id, element_id(o), p4.id, p.id) ) tmp; ---- -[0, 0, 13, 42, 29, 68, 33] 14 10995116277782 -[0, 0, 13, 42, 29, 69, 36] 14 10995116277782 -[0, 0, 13, 43, 31, 71, 35] 14 10995116277782 -[0, 0, 13, 43, 31, 72, 40] 14 10995116277782 -[0, 0, 13, 43, 31, 73, 45] 14 10995116277782 -[0, 0, 13, 43, 31, 74, 46] 14 10995116277782 -[0, 0, 13, 44, 33, 77, 36] 14 10995116277782 -[0, 0, 13, 44, 33, 78, 38] 14 10995116277782 -[0, 0, 13, 44, 33, 79, 39] 14 10995116277782 -[0, 0, 13, 44, 33, 80, 43] 14 10995116277782 -[0, 0, 13, 45, 36, 82, 45] 14 10995116277782 -[0, 1, 26, 62, 31, 71, 35] 14 24189255811081 -[0, 1, 26, 62, 31, 72, 40] 14 24189255811081 -[0, 1, 26, 62, 31, 73, 45] 14 24189255811081 -[0, 1, 26, 62, 31, 74, 46] 14 24189255811081 -[0, 1, 26, 63, 32, 75, 33] 14 24189255811081 -[0, 1, 26, 63, 32, 76, 36] 14 24189255811081 -[0, 1, 26, 64, 33, 77, 36] 14 24189255811081 -[0, 1, 26, 64, 33, 78, 38] 14 24189255811081 -[0, 1, 26, 64, 33, 79, 39] 14 24189255811081 -[0, 1, 26, 64, 33, 80, 43] 14 24189255811081 -[0, 1, 26, 63, 32, 76, 36, 82, 45] 14 24189255811081 -[0, 2, 32, 75, 33, 77, 36] 14 26388279066668 -[0, 2, 32, 75, 33, 78, 38] 14 26388279066668 -[0, 2, 32, 75, 33, 79, 39] 14 26388279066668 -[0, 2, 32, 75, 33, 80, 43] 14 26388279066668 -[0, 2, 32, 76, 36, 82, 45] 14 26388279066668 +28587302322180 [0, 0, 13, 42, 29, 68, 33] 14 10995116277782 +28587302322204 [0, 0, 13, 42, 29, 69, 36] 14 10995116277782 +28587302322196 [0, 0, 13, 43, 31, 71, 35] 14 10995116277782 +30786325577740 [0, 0, 13, 43, 31, 72, 40] 14 10995116277782 +35184372088850 [0, 0, 13, 43, 31, 73, 45] 14 10995116277782 +35184372088856 [0, 0, 13, 43, 31, 74, 46] 14 10995116277782 +28587302322204 [0, 0, 13, 44, 33, 77, 36] 14 10995116277782 +28587302322223 [0, 0, 13, 44, 33, 78, 38] 14 10995116277782 +30786325577731 [0, 0, 13, 44, 33, 79, 39] 14 10995116277782 +32985348833329 [0, 0, 13, 44, 33, 80, 43] 14 10995116277782 +35184372088850 [0, 0, 13, 45, 36, 82, 45] 14 10995116277782 +28587302322196 [0, 1, 26, 62, 31, 71, 35] 14 24189255811081 +30786325577740 [0, 1, 26, 62, 31, 72, 40] 14 24189255811081 +35184372088850 [0, 1, 26, 62, 31, 73, 45] 14 24189255811081 +35184372088856 [0, 1, 26, 62, 31, 74, 46] 14 24189255811081 +28587302322180 [0, 1, 26, 63, 32, 75, 33] 14 24189255811081 +28587302322204 [0, 1, 26, 63, 32, 76, 36] 14 24189255811081 +28587302322204 [0, 1, 26, 64, 33, 77, 36] 14 24189255811081 +28587302322223 [0, 1, 26, 64, 33, 78, 38] 14 24189255811081 +30786325577731 [0, 1, 26, 64, 33, 79, 39] 14 24189255811081 +32985348833329 [0, 1, 26, 64, 33, 80, 43] 14 24189255811081 +35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 14 24189255811081 +28587302322204 [0, 2, 32, 75, 33, 77, 36] 14 26388279066668 +28587302322223 [0, 2, 32, 75, 33, 78, 38] 14 26388279066668 +30786325577731 [0, 2, 32, 75, 33, 79, 39] 14 26388279066668 +32985348833329 [0, 2, 32, 75, 33, 80, 43] 14 26388279066668 +35184372088850 [0, 2, 32, 76, 36, 82, 45] 14 26388279066668 statement error -FROM GRAPH_TABLE (snb From 88f0235911c8e62e84d458ffdd17c0793d71ddae Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 12:07:47 +0100 Subject: [PATCH 34/47] Path length now works --- .../functions/tablefunctions/match.hpp | 2 +- .../functions/tablefunctions/match.cpp | 88 ++++++++----------- test/sql/path-finding/complex_matching.test | 58 ++++++------ 3 files changed, 69 insertions(+), 79 deletions(-) diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index 8d7174fe..c746d3d7 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -135,7 +135,7 @@ namespace duckdb { CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, vector>& column_list); - static bool CheckNamedSubpath(SubPath &subpath, vector>& column_list, + static void CheckNamedSubpath(SubPath &subpath, vector>& column_list, CreatePropertyGraphInfo &pg_table); }; } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index cc97d034..63f50014 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -625,30 +625,8 @@ namespace duckdb { edge_table, prev_binding, edge_binding, next_binding); - // auto cross_join_src_dst = make_uniq(JoinRefType::CROSS); - - //! src alias (FROM src a) - // auto src_vertex_ref = make_uniq(); - // src_vertex_ref->table_name = edge_table->source_reference; - // src_vertex_ref->alias = prev_binding; - - - // cross_join_src_dst->left = std::move(src_vertex_ref); - - //! dst alias (FROM dst b) - // auto dst_vertex_ref = make_uniq(); - // dst_vertex_ref->table_name = edge_table->destination_reference; - // dst_vertex_ref->alias = next_binding; - - // cross_join_src_dst->right = std::move(dst_vertex_ref); - - //! (SELECT count(cte1.temp) * 0 as temp from cte1) __x - // auto cross_join_with_cte = make_uniq(JoinRefType::CROSS); - // cross_join_with_cte->left = std::move(temp_cte_select_subquery); - // cross_join_with_cte->right = std::move(cross_join_src_dst); auto temp_cte_select_subquery = CreateCountCTESubquery(); - // from_clause = std::move(temp_cte_select_subquery); if (from_clause) { // create a cross join since there is already something in the @@ -706,33 +684,53 @@ namespace duckdb { //! from src s, a.rowid, b.rowid) between lower and upper } - bool PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, + void PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, CreatePropertyGraphInfo& pg_table) { - bool found = false; for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { - const FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); + FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); if (parsed_ref == nullptr) { continue; } + auto column_ref = dynamic_cast(parsed_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + // Trying to check parsed_ref->alias directly leads to a segfault + string column_alias = parsed_ref->alias; + + if (column_ref->column_names[0] != subpath.path_variable) { + continue; + } if (parsed_ref->function_name == "element_id") { // Check subpath name matches the column referenced in the function --> element_id(named_subpath) - auto column_ref = dynamic_cast(parsed_ref->children[0].get()); - if (column_ref->column_names[0] == subpath.path_variable) { - column_list.erase(column_list.begin() + idx_i); - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - shortest_path_function->alias = subpath.path_variable; - column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); - break; + column_list.erase(column_list.begin() + idx_i); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + + if (column_alias.empty()) { + shortest_path_function->alias = "element_id(" + subpath.path_variable + ")"; + } else { + shortest_path_function->alias = column_alias; } - } + // shortest_path_function->alias = parsed_ref->alias == "" ? "element_id(" + subpath.path_variable + ")" : parsed_ref->alias; + column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); + } else if (parsed_ref->function_name == "path_length") { + column_list.erase(column_list.begin() + idx_i); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + auto path_len_children = vector>(); + path_len_children.push_back(std::move(shortest_path_function)); + auto path_len = + make_uniq("len", std::move(path_len_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto path_length_function = + make_uniq("//", std::move(div_children)); + path_length_function->alias = column_alias == "" ? "path_length(" + subpath.path_variable + ")" : column_alias; + + column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); + } else if (parsed_ref->function_name == "vertices") {} } - // if (found) { - // column_list.erase(column_list.begin() + idx_named_subpath); - // auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - // shortest_path_function->alias = subpath.path_variable; - // column_list.insert(column_list.begin() + idx_named_subpath, std::move(shortest_path_function)); - // } - return found; } void PGQMatchFunction::ProcessPathList(vector>& path_list, @@ -745,6 +743,7 @@ namespace duckdb { GetPathElement(path_list[0]); if (!previous_vertex_element) { const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); + CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); if (previous_vertex_subpath->where_clause) { conditions.push_back(std::move(previous_vertex_subpath->where_clause)); } @@ -752,7 +751,6 @@ namespace duckdb { previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); } else { // Add the shortest path if the name is found in the column_list - CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, alias_map, pg_table, extra_alias_counter, column_list); return; @@ -776,14 +774,6 @@ namespace duckdb { if (next_vertex_subpath->path_list.size() > 1) { throw NotImplementedException("Recursive patterns are not yet supported."); } - - // Check the size of the subpath path list - // if size == 1: - // Path Element with a WHERE - // (){3} Repeated vertices are not supported - // Else: - // Unsure if this is possible to reach. Perhaps at some point with a nested pattern? - // Will be unsupported for now if (next_vertex_subpath->where_clause) { conditions.push_back(std::move(next_vertex_subpath->where_clause)); } diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index c7a42a2e..373c7b97 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,39 +49,39 @@ EDGE TABLES ( LABEL replyOf ); -query III +query IIII -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) - COLUMNS (p3.id, element_id(o), p4.id, p.id) + COLUMNS (p3.id, element_id(o), path_length(o), p4.id, p.id) ) tmp; ---- -28587302322180 [0, 0, 13, 42, 29, 68, 33] 14 10995116277782 -28587302322204 [0, 0, 13, 42, 29, 69, 36] 14 10995116277782 -28587302322196 [0, 0, 13, 43, 31, 71, 35] 14 10995116277782 -30786325577740 [0, 0, 13, 43, 31, 72, 40] 14 10995116277782 -35184372088850 [0, 0, 13, 43, 31, 73, 45] 14 10995116277782 -35184372088856 [0, 0, 13, 43, 31, 74, 46] 14 10995116277782 -28587302322204 [0, 0, 13, 44, 33, 77, 36] 14 10995116277782 -28587302322223 [0, 0, 13, 44, 33, 78, 38] 14 10995116277782 -30786325577731 [0, 0, 13, 44, 33, 79, 39] 14 10995116277782 -32985348833329 [0, 0, 13, 44, 33, 80, 43] 14 10995116277782 -35184372088850 [0, 0, 13, 45, 36, 82, 45] 14 10995116277782 -28587302322196 [0, 1, 26, 62, 31, 71, 35] 14 24189255811081 -30786325577740 [0, 1, 26, 62, 31, 72, 40] 14 24189255811081 -35184372088850 [0, 1, 26, 62, 31, 73, 45] 14 24189255811081 -35184372088856 [0, 1, 26, 62, 31, 74, 46] 14 24189255811081 -28587302322180 [0, 1, 26, 63, 32, 75, 33] 14 24189255811081 -28587302322204 [0, 1, 26, 63, 32, 76, 36] 14 24189255811081 -28587302322204 [0, 1, 26, 64, 33, 77, 36] 14 24189255811081 -28587302322223 [0, 1, 26, 64, 33, 78, 38] 14 24189255811081 -30786325577731 [0, 1, 26, 64, 33, 79, 39] 14 24189255811081 -32985348833329 [0, 1, 26, 64, 33, 80, 43] 14 24189255811081 -35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 14 24189255811081 -28587302322204 [0, 2, 32, 75, 33, 77, 36] 14 26388279066668 -28587302322223 [0, 2, 32, 75, 33, 78, 38] 14 26388279066668 -30786325577731 [0, 2, 32, 75, 33, 79, 39] 14 26388279066668 -32985348833329 [0, 2, 32, 75, 33, 80, 43] 14 26388279066668 -35184372088850 [0, 2, 32, 76, 36, 82, 45] 14 26388279066668 +28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 14 10995116277782 +28587302322204 [0, 0, 13, 42, 29, 69, 36] 3 14 10995116277782 +28587302322196 [0, 0, 13, 43, 31, 71, 35] 3 14 10995116277782 +30786325577740 [0, 0, 13, 43, 31, 72, 40] 3 14 10995116277782 +35184372088850 [0, 0, 13, 43, 31, 73, 45] 3 14 10995116277782 +35184372088856 [0, 0, 13, 43, 31, 74, 46] 3 14 10995116277782 +28587302322204 [0, 0, 13, 44, 33, 77, 36] 3 14 10995116277782 +28587302322223 [0, 0, 13, 44, 33, 78, 38] 3 14 10995116277782 +30786325577731 [0, 0, 13, 44, 33, 79, 39] 3 14 10995116277782 +32985348833329 [0, 0, 13, 44, 33, 80, 43] 3 14 10995116277782 +35184372088850 [0, 0, 13, 45, 36, 82, 45] 3 14 10995116277782 +28587302322196 [0, 1, 26, 62, 31, 71, 35] 3 14 24189255811081 +30786325577740 [0, 1, 26, 62, 31, 72, 40] 3 14 24189255811081 +35184372088850 [0, 1, 26, 62, 31, 73, 45] 3 14 24189255811081 +35184372088856 [0, 1, 26, 62, 31, 74, 46] 3 14 24189255811081 +28587302322180 [0, 1, 26, 63, 32, 75, 33] 3 14 24189255811081 +28587302322204 [0, 1, 26, 63, 32, 76, 36] 3 14 24189255811081 +28587302322204 [0, 1, 26, 64, 33, 77, 36] 3 14 24189255811081 +28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 14 24189255811081 +30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 14 24189255811081 +32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 14 24189255811081 +35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 14 24189255811081 +28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 14 26388279066668 +28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 14 26388279066668 +30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 14 26388279066668 +32985348833329 [0, 2, 32, 75, 33, 80, 43] 3 14 26388279066668 +35184372088850 [0, 2, 32, 76, 36, 82, 45] 3 14 26388279066668 statement error -FROM GRAPH_TABLE (snb From c7698b8d077110e6ed90864be188d8640e04ee13 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 12:16:23 +0100 Subject: [PATCH 35/47] Adding vertices function --- .../functions/tablefunctions/match.cpp | 20 +++++++++++++++++-- test/sql/path-finding/complex_matching.test | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 63f50014..7dde9be6 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -726,10 +726,26 @@ namespace duckdb { div_children.push_back(std::move(constant_two)); auto path_length_function = make_uniq("//", std::move(div_children)); - path_length_function->alias = column_alias == "" ? "path_length(" + subpath.path_variable + ")" : column_alias; + path_length_function->alias = column_alias.empty() ? "path_length(" + subpath.path_variable + ")" : column_alias; column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); - } else if (parsed_ref->function_name == "vertices") {} + } else if (parsed_ref->function_name == "vertices") { + column_list.erase(column_list.begin() + idx_i); + auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); + auto list_slice_children = vector>(); + auto slice_begin = make_uniq(Value::INTEGER(2)); + auto slice_end = make_uniq(Value::INTEGER(-1)); + auto slice_step = make_uniq(Value::INTEGER(2)); + + list_slice_children.push_back(std::move(shortest_path_function)); + list_slice_children.push_back(std::move(slice_begin)); + list_slice_children.push_back(std::move(slice_end)); + list_slice_children.push_back(std::move(slice_step)); + auto list_slice = + make_uniq("list_slice", std::move(list_slice_children)); + list_slice->alias = column_alias.empty() ? "vertices(" + subpath.path_variable + ")" : column_alias; + column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); + } } } diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 373c7b97..d705f057 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -52,7 +52,7 @@ EDGE TABLES ( query IIII -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) - COLUMNS (p3.id, element_id(o), path_length(o), p4.id, p.id) + COLUMNS (p3.id, element_id(o), path_length(o), vertices(o), p4.id, p.id) ) tmp; ---- 28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 14 10995116277782 From d87c9d5ff11b31b4c6706d438975e025d0172c99 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 12:28:33 +0100 Subject: [PATCH 36/47] Adding vertices function --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index 8d29abc6..82b4945b 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit 8d29abc6b27b7c8cfa32a227db82df707eb1e481 +Subproject commit 82b4945bf6ab44b15db8b95a7cac247f6842feb1 From 47a10d4a24deaa04810035e0ab3acebb78fd082a Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 12:28:39 +0100 Subject: [PATCH 37/47] Adding edges test --- .../functions/tablefunctions/match.cpp | 24 +++++--- test/sql/path-finding/complex_matching.test | 58 +++++++++---------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 7dde9be6..860f1f79 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -703,7 +703,6 @@ namespace duckdb { } if (parsed_ref->function_name == "element_id") { // Check subpath name matches the column referenced in the function --> element_id(named_subpath) - column_list.erase(column_list.begin() + idx_i); auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); if (column_alias.empty()) { @@ -711,10 +710,9 @@ namespace duckdb { } else { shortest_path_function->alias = column_alias; } - // shortest_path_function->alias = parsed_ref->alias == "" ? "element_id(" + subpath.path_variable + ")" : parsed_ref->alias; + column_list.erase(column_list.begin() + idx_i); column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); } else if (parsed_ref->function_name == "path_length") { - column_list.erase(column_list.begin() + idx_i); auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); auto path_len_children = vector>(); path_len_children.push_back(std::move(shortest_path_function)); @@ -727,23 +725,31 @@ namespace duckdb { auto path_length_function = make_uniq("//", std::move(div_children)); path_length_function->alias = column_alias.empty() ? "path_length(" + subpath.path_variable + ")" : column_alias; - - column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); - } else if (parsed_ref->function_name == "vertices") { column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); + } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); auto list_slice_children = vector>(); - auto slice_begin = make_uniq(Value::INTEGER(2)); + list_slice_children.push_back(std::move(shortest_path_function)); + + if (parsed_ref->function_name == "vertices") { + list_slice_children.push_back(make_uniq(Value::INTEGER(1))); + } else { + list_slice_children.push_back(make_uniq(Value::INTEGER(2))); + } auto slice_end = make_uniq(Value::INTEGER(-1)); auto slice_step = make_uniq(Value::INTEGER(2)); - list_slice_children.push_back(std::move(shortest_path_function)); - list_slice_children.push_back(std::move(slice_begin)); list_slice_children.push_back(std::move(slice_end)); list_slice_children.push_back(std::move(slice_step)); auto list_slice = make_uniq("list_slice", std::move(list_slice_children)); + if (parsed_ref->function_name == "vertices") { list_slice->alias = column_alias.empty() ? "vertices(" + subpath.path_variable + ")" : column_alias; + } else { + list_slice->alias = column_alias.empty() ? "edges(" + subpath.path_variable + ")" : column_alias; + } + column_list.erase(column_list.begin() + idx_i); column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); } } diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index d705f057..5a953b48 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -49,39 +49,39 @@ EDGE TABLES ( LABEL replyOf ); -query IIII +query IIIIIII -FROM GRAPH_TABLE (snb MATCH o = ANY SHORTEST (p4:Person where p4.rowid = 0)-[w3:knows]->(p:Person)-[w:knows]->{1,3}(p2:Person)-[w2:knows]->(p3:Person) - COLUMNS (p3.id, element_id(o), path_length(o), vertices(o), p4.id, p.id) + COLUMNS (p3.id, element_id(o), path_length(o), vertices(o), edges(o), p4.id, p.id) ) tmp; ---- -28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 14 10995116277782 -28587302322204 [0, 0, 13, 42, 29, 69, 36] 3 14 10995116277782 -28587302322196 [0, 0, 13, 43, 31, 71, 35] 3 14 10995116277782 -30786325577740 [0, 0, 13, 43, 31, 72, 40] 3 14 10995116277782 -35184372088850 [0, 0, 13, 43, 31, 73, 45] 3 14 10995116277782 -35184372088856 [0, 0, 13, 43, 31, 74, 46] 3 14 10995116277782 -28587302322204 [0, 0, 13, 44, 33, 77, 36] 3 14 10995116277782 -28587302322223 [0, 0, 13, 44, 33, 78, 38] 3 14 10995116277782 -30786325577731 [0, 0, 13, 44, 33, 79, 39] 3 14 10995116277782 -32985348833329 [0, 0, 13, 44, 33, 80, 43] 3 14 10995116277782 -35184372088850 [0, 0, 13, 45, 36, 82, 45] 3 14 10995116277782 -28587302322196 [0, 1, 26, 62, 31, 71, 35] 3 14 24189255811081 -30786325577740 [0, 1, 26, 62, 31, 72, 40] 3 14 24189255811081 -35184372088850 [0, 1, 26, 62, 31, 73, 45] 3 14 24189255811081 -35184372088856 [0, 1, 26, 62, 31, 74, 46] 3 14 24189255811081 -28587302322180 [0, 1, 26, 63, 32, 75, 33] 3 14 24189255811081 -28587302322204 [0, 1, 26, 63, 32, 76, 36] 3 14 24189255811081 -28587302322204 [0, 1, 26, 64, 33, 77, 36] 3 14 24189255811081 -28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 14 24189255811081 -30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 14 24189255811081 -32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 14 24189255811081 -35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 14 24189255811081 -28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 14 26388279066668 -28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 14 26388279066668 -30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 14 26388279066668 -32985348833329 [0, 2, 32, 75, 33, 80, 43] 3 14 26388279066668 -35184372088850 [0, 2, 32, 76, 36, 82, 45] 3 14 26388279066668 +28587302322180 [0, 0, 13, 42, 29, 68, 33] 3 [0, 13, 29, 33] [0, 42, 68] 14 10995116277782 +28587302322204 [0, 0, 13, 42, 29, 69, 36] 3 [0, 13, 29, 36] [0, 42, 69] 14 10995116277782 +28587302322196 [0, 0, 13, 43, 31, 71, 35] 3 [0, 13, 31, 35] [0, 43, 71] 14 10995116277782 +30786325577740 [0, 0, 13, 43, 31, 72, 40] 3 [0, 13, 31, 40] [0, 43, 72] 14 10995116277782 +35184372088850 [0, 0, 13, 43, 31, 73, 45] 3 [0, 13, 31, 45] [0, 43, 73] 14 10995116277782 +35184372088856 [0, 0, 13, 43, 31, 74, 46] 3 [0, 13, 31, 46] [0, 43, 74] 14 10995116277782 +28587302322204 [0, 0, 13, 44, 33, 77, 36] 3 [0, 13, 33, 36] [0, 44, 77] 14 10995116277782 +28587302322223 [0, 0, 13, 44, 33, 78, 38] 3 [0, 13, 33, 38] [0, 44, 78] 14 10995116277782 +30786325577731 [0, 0, 13, 44, 33, 79, 39] 3 [0, 13, 33, 39] [0, 44, 79] 14 10995116277782 +32985348833329 [0, 0, 13, 44, 33, 80, 43] 3 [0, 13, 33, 43] [0, 44, 80] 14 10995116277782 +35184372088850 [0, 0, 13, 45, 36, 82, 45] 3 [0, 13, 36, 45] [0, 45, 82] 14 10995116277782 +28587302322196 [0, 1, 26, 62, 31, 71, 35] 3 [0, 26, 31, 35] [1, 62, 71] 14 24189255811081 +30786325577740 [0, 1, 26, 62, 31, 72, 40] 3 [0, 26, 31, 40] [1, 62, 72] 14 24189255811081 +35184372088850 [0, 1, 26, 62, 31, 73, 45] 3 [0, 26, 31, 45] [1, 62, 73] 14 24189255811081 +35184372088856 [0, 1, 26, 62, 31, 74, 46] 3 [0, 26, 31, 46] [1, 62, 74] 14 24189255811081 +28587302322180 [0, 1, 26, 63, 32, 75, 33] 3 [0, 26, 32, 33] [1, 63, 75] 14 24189255811081 +28587302322204 [0, 1, 26, 63, 32, 76, 36] 3 [0, 26, 32, 36] [1, 63, 76] 14 24189255811081 +28587302322204 [0, 1, 26, 64, 33, 77, 36] 3 [0, 26, 33, 36] [1, 64, 77] 14 24189255811081 +28587302322223 [0, 1, 26, 64, 33, 78, 38] 3 [0, 26, 33, 38] [1, 64, 78] 14 24189255811081 +30786325577731 [0, 1, 26, 64, 33, 79, 39] 3 [0, 26, 33, 39] [1, 64, 79] 14 24189255811081 +32985348833329 [0, 1, 26, 64, 33, 80, 43] 3 [0, 26, 33, 43] [1, 64, 80] 14 24189255811081 +35184372088850 [0, 1, 26, 63, 32, 76, 36, 82, 45] 4 [0, 26, 32, 36, 45] [1, 63, 76, 82] 14 24189255811081 +28587302322204 [0, 2, 32, 75, 33, 77, 36] 3 [0, 32, 33, 36] [2, 75, 77] 14 26388279066668 +28587302322223 [0, 2, 32, 75, 33, 78, 38] 3 [0, 32, 33, 38] [2, 75, 78] 14 26388279066668 +30786325577731 [0, 2, 32, 75, 33, 79, 39] 3 [0, 32, 33, 39] [2, 75, 79] 14 26388279066668 +32985348833329 [0, 2, 32, 75, 33, 80, 43] 3 [0, 32, 33, 43] [2, 75, 80] 14 26388279066668 +35184372088850 [0, 2, 32, 76, 36, 82, 45] 3 [0, 32, 36, 45] [2, 76, 82] 14 26388279066668 statement error -FROM GRAPH_TABLE (snb From 68d5e2e65b844d0b852c0bdee50d42f7830885da Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 13:45:25 +0100 Subject: [PATCH 38/47] Fix tests --- test/sql/path-finding/complex_matching.test | 33 +++++++++++++++++++-- test/sql/path-finding/shortest_path.test | 2 +- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/test/sql/path-finding/complex_matching.test b/test/sql/path-finding/complex_matching.test index 5a953b48..9798916b 100644 --- a/test/sql/path-finding/complex_matching.test +++ b/test/sql/path-finding/complex_matching.test @@ -335,8 +335,35 @@ statement error ---- Parser Error: syntax error at or near "{" -statement ok +query III -FROM GRAPH_TABLE (snb - MATCH p = ANY SHORTEST (a:Person where a.id = 28587302322180)-[k:knows]->{1,3}(b:Person) - COLUMNS (a.id) + MATCH p = (a:Person where a.id = 16)-[k:knows]->{1,3}(b:Person) + COLUMNS (element_id(p), a.id, b.id) ) tmp; +---- +[1, 3, 5] 16 2199023255594 +[1, 3, 5, 16, 10] 16 8796093022244 +[1, 3, 5, 17, 12] 16 10995116277761 +[1, 3, 5, 18, 16] 16 13194139533342 +[1, 3, 5, 19, 17] 16 13194139533352 +[1, 3, 5, 16, 10, 32, 18] 16 13194139533355 +[1, 3, 5, 20, 19] 16 15393162788877 +[1, 3, 5, 17, 12, 39, 20] 16 17592186044443 +[1, 3, 5, 21, 21] 16 17592186044461 +[1, 3, 5, 19, 17, 48, 23] 16 19791209299987 +[1, 3, 5, 22, 26] 16 24189255811081 +[1, 3, 5, 22, 26, 61, 27] 16 24189255811109 +[1, 3, 5, 19, 17, 49, 29] 16 26388279066641 +[1, 4, 30] 16 26388279066655 +[1, 3, 5, 23, 31] 16 26388279066658 +[1, 3, 5, 24, 32] 16 26388279066668 +[1, 5, 33] 16 28587302322180 +[1, 3, 5, 26, 35] 16 28587302322196 +[1, 6, 36] 16 28587302322204 +[1, 5, 33, 78, 38] 16 28587302322223 +[1, 5, 33, 79, 39] 16 30786325577731 +[1, 3, 5, 27, 40] 16 30786325577740 +[1, 5, 33, 80, 43] 16 32985348833329 +[1, 3, 5, 22, 26, 66, 44] 16 35184372088834 +[1, 3, 5, 28, 45] 16 35184372088850 +[1, 3, 5, 23, 31, 74, 46] 16 35184372088856 diff --git a/test/sql/path-finding/shortest_path.test b/test/sql/path-finding/shortest_path.test index 81c7f5dd..a82e8ec1 100644 --- a/test/sql/path-finding/shortest_path.test +++ b/test/sql/path-finding/shortest_path.test @@ -58,7 +58,7 @@ query IIII -FROM GRAPH_TABLE (pg MATCH p = ANY SHORTEST (a:Person)-[k:knows]->{1,3}(b:Person) - COLUMNS (path_length(p), p, a.name as name, b.name as b_name) + COLUMNS (path_length(p), element_id(p), a.name as name, b.name as b_name) ) study order by study.name, study.b_name; ---- From d0c0b494f7fe85f97f28c0b0777a7ce6d153968c Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 13:46:29 +0100 Subject: [PATCH 39/47] Fix tests --- test/sql/snb/snb.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sql/snb/snb.test b/test/sql/snb/snb.test index de45a93e..1da20383 100644 --- a/test/sql/snb/snb.test +++ b/test/sql/snb/snb.test @@ -162,8 +162,8 @@ query IIIIII COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id, replyAuthor.firstName, replyAuthor.lastName) ) tmp; ---- -962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev 962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev +962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev # IS7. Replies of a message #set messageId 618475290624 From 0d1320321cc4676a8c8b800f861a05c4865f3223 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 13:49:06 +0100 Subject: [PATCH 40/47] Adding order by --- test/sql/snb/snb.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/sql/snb/snb.test b/test/sql/snb/snb.test index 1da20383..b165e374 100644 --- a/test/sql/snb/snb.test +++ b/test/sql/snb/snb.test @@ -160,7 +160,8 @@ query IIIIII MATCH (replyAuthor:person)<-[au2:hasAuthor]-(c:message where c.ParentMessageId is not null)-[r:replyOf]->(m:message where m.id = 618475290624)-[au:hasAuthor]->(messageAuthor:person), (replyAuthor:person)-[k:knows]-(messageAuthor:person) COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id, replyAuthor.firstName, replyAuthor.lastName) - ) tmp; + ) tmp + ORDER BY c.id; ---- 962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev 962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev From fc0296cac814afb189004752b83c2e71b1e3dbda Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 13:57:45 +0100 Subject: [PATCH 41/47] Changed order by --- test/sql/scalar/get_csr_w_type.test | 2 -- test/sql/snb/snb.test | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test/sql/scalar/get_csr_w_type.test b/test/sql/scalar/get_csr_w_type.test index 47880d84..7fbc514d 100644 --- a/test/sql/scalar/get_csr_w_type.test +++ b/test/sql/scalar/get_csr_w_type.test @@ -1,8 +1,6 @@ # name: test/sql/sqlpgq/get_csr_w_type.test # group: [sqlpgq] - - require duckpgq statement ok diff --git a/test/sql/snb/snb.test b/test/sql/snb/snb.test index b165e374..f04f2cba 100644 --- a/test/sql/snb/snb.test +++ b/test/sql/snb/snb.test @@ -161,10 +161,10 @@ query IIIIII (replyAuthor:person)-[k:knows]-(messageAuthor:person) COLUMNS (c.id,c.content,c.creationDate, replyAuthor.id, replyAuthor.firstName, replyAuthor.lastName) ) tmp - ORDER BY c.id; + ORDER BY tmp.content; ---- -962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev 962072674306 thanks 2012-07-08 20:32:03.239+00 24189255811081 Alim Guliyev +962072674305 yes 2012-07-08 23:48:41.63+00 24189255811081 Alim Guliyev # IS7. Replies of a message #set messageId 618475290624 From abdb35b4231ae6cbdd1b65ad8c23a673ca1c3eaf Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:00:21 +0100 Subject: [PATCH 42/47] Fix small bug with csr ptr --- duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp index dfdbcd67..d9ef7a7f 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp @@ -59,7 +59,7 @@ static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_ } auto duckpgq_state = reinterpret_cast(duckpgq_state_entry->second.get()); - auto csr_id = data_p.bind_data->Cast().csr_id; + auto csr_id = data_p.bind_data->Cast().csr_id; CSR *csr = duckpgq_state->GetCSR(csr_id); output.SetCardinality(5); output.data[0].SetVectorType(VectorType::FLAT_VECTOR); From e9305862e119c640a72a9ca9f4489c176325e718 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:04:33 +0100 Subject: [PATCH 43/47] Format fix --- CMakeLists.txt | 33 +- Makefile | 2 +- .../functions/tablefunctions/match.hpp | 249 +-- .../functions/tablefunctions/pgq_scan.hpp | 4 +- .../tablefunctions/create_property_graph.cpp | 6 +- .../functions/tablefunctions/match.cpp | 1928 +++++++++-------- .../functions/tablefunctions/pgq_scan.cpp | 9 +- duckpgq/src/duckpgq_extension.cpp | 28 +- 8 files changed, 1150 insertions(+), 1109 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48cc12d4..97e93588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,28 +1,25 @@ cmake_minimum_required(VERSION 2.8.12) -# Set extension name here -#<<<<<<< HEAD -#set(TARGET_NAME duckpgq) +# Set extension name here <<<<<<< HEAD set(TARGET_NAME duckpgq) # -#set(EXTENSION_NAME ${TARGET_NAME}_extension) -#project(${TARGET_NAME}) -#set(CMAKE_CXX_STANDARD 11) +# set(EXTENSION_NAME ${TARGET_NAME}_extension) project(${TARGET_NAME}) +# set(CMAKE_CXX_STANDARD 11) # -#include_directories(duckpgq/include) -#add_subdirectory(duckpgq/src) +# include_directories(duckpgq/include) add_subdirectory(duckpgq/src) # -#include_directories(../duckdb-pgq/third_party/libpg_query/include) +# include_directories(../duckdb-pgq/third_party/libpg_query/include) # -#add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) +# add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) # -#set(PARAMETERS "-warnings") -#build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES}) -#======= +# set(PARAMETERS "-warnings") build_loadable_extension(${TARGET_NAME} +# ${PARAMETERS} ${EXTENSION_SOURCES}) +# ======= set(TARGET_NAME duckpgq) set(CMAKE_CXX_STANDARD 11) -# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then -# used in cmake with find_package. Feel free to remove or replace with other dependencies. -# Note that it should also be removed from vcpkg.json to prevent needlessly installing it.. +# DuckDB's extension distribution supports vcpkg. As such, dependencies can be +# added in ./vcpkg.json and then used in cmake with find_package. Feel free to +# remove or replace with other dependencies. Note that it should also be removed +# from vcpkg.json to prevent needlessly installing it.. find_package(OpenSSL REQUIRED) set(EXTENSION_NAME ${TARGET_NAME}_extension) @@ -33,7 +30,7 @@ include_directories(duckpgq/include) add_subdirectory(duckpgq/src) include_directories(../duckdb-pgq/third_party/libpg_query/include) -#set(EXTENSION_SOURCES duckpgq/src/duckpgq_extension.cpp) +# set(EXTENSION_SOURCES duckpgq/src/duckpgq_extension.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) @@ -41,7 +38,7 @@ build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link OpenSSL in both the static library as the loadable extension target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) -#>>>>>>> template/main +# >>>>>>> template/main install( TARGETS ${EXTENSION_NAME} diff --git a/Makefile b/Makefile index 98f1215a..8531f624 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ test_release_python: release_python #### Misc format: - find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i + find duckpgq/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i cmake-format -i CMakeLists.txt update: git submodule update --remote --merge diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index c746d3d7..4144d22a 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -16,126 +16,131 @@ #include "duckdb/parser/path_pattern.hpp" namespace duckdb { - struct PGQMatchFunction : public TableFunction { - public: - PGQMatchFunction() { - name = "duckpgq_match"; - bind_replace = MatchBindReplace; - } - - struct MatchBindData : public TableFunctionData { - bool done = false; - }; - - - static shared_ptr - FindGraphTable(const string& label, CreatePropertyGraphInfo& pg_table); - - static void - CheckInheritance(const shared_ptr& tableref, - PathElement* element, - vector>& conditions); - - static void - CheckEdgeTableConstraints(const string& src_reference, - const string& dst_reference, - const shared_ptr& edge_table); - - static unique_ptr CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string& vertex_alias, const string& edge_alias); - - static PathElement* - GetPathElement(const unique_ptr& path_reference); - - static unique_ptr - GetCountTable(const shared_ptr& edge_table, - const string& prev_binding); - - static unique_ptr - GetJoinRef(const shared_ptr& edge_table, - const string& edge_binding, const string& prev_binding, - const string& next_binding); - - static unique_ptr CreateCountCTESubquery(); - - static unique_ptr - CreateCSRCTE(const shared_ptr& edge_table, - const string& edge_binding, const string& prev_binding, - const string& next_binding); - - static void EdgeTypeAny(const shared_ptr& edge_table, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeLeft(const shared_ptr& edge_table, - const string& next_table_name, - const string& prev_table_name, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeRight(const shared_ptr& edge_table, - const string& next_table_name, - const string& prev_table_name, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeLeftRight( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions, - unordered_map& alias_map, int32_t& extra_alias_counter); - - static PathElement* - HandleNestedSubPath(unique_ptr& path_reference, - vector>& conditions, - idx_t element_idx); - - static unique_ptr MatchBindReplace(ClientContext& context, - TableFunctionBindInput& input); - - static unique_ptr GenerateSubpathPatternSubquery( - unique_ptr& path_pattern, CreatePropertyGraphInfo* pg_table, - vector>& column_list, - unordered_set& named_subpaths); - - static unique_ptr - CreatePathFindingFunction(vector> &path_list, CreatePropertyGraphInfo &pg_table); - - - static void AddPathFinding(const unique_ptr& select_node, - unique_ptr& from_clause, - vector>& conditions, - const string& prev_binding, const string& edge_binding, const string& next_binding, - const shared_ptr& edge_table, - const SubPath* subpath); - - static void AddEdgeJoins(const unique_ptr& select_node, - const shared_ptr& edge_table, - const shared_ptr& previous_vertex_table, - const shared_ptr& next_vertex_table, - PGQMatchType edge_type, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions, - unordered_map& alias_map, - int32_t& extra_alias_counter); - - static void ProcessPathList(vector>& path_pattern, - vector>& conditions, - unique_ptr& from_clause, unique_ptr& select_node, - unordered_map& alias_map, - CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, - vector>& column_list); - - static void CheckNamedSubpath(SubPath &subpath, vector>& column_list, - CreatePropertyGraphInfo &pg_table); - }; +struct PGQMatchFunction : public TableFunction { +public: + PGQMatchFunction() { + name = "duckpgq_match"; + bind_replace = MatchBindReplace; + } + + struct MatchBindData : public TableFunctionData { + bool done = false; + }; + + static shared_ptr + FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); + + static void + CheckInheritance(const shared_ptr &tableref, + PathElement *element, + vector> &conditions); + + static void + CheckEdgeTableConstraints(const string &src_reference, + const string &dst_reference, + const shared_ptr &edge_table); + + static unique_ptr CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string &vertex_alias, const string &edge_alias); + + static PathElement * + GetPathElement(const unique_ptr &path_reference); + + static unique_ptr + GetCountTable(const shared_ptr &edge_table, + const string &prev_binding); + + static unique_ptr + GetJoinRef(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding); + + static unique_ptr CreateCountCTESubquery(); + + static unique_ptr + CreateCSRCTE(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding); + + static void EdgeTypeAny(const shared_ptr &edge_table, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void EdgeTypeLeft(const shared_ptr &edge_table, + const string &next_table_name, + const string &prev_table_name, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void EdgeTypeRight(const shared_ptr &edge_table, + const string &next_table_name, + const string &prev_table_name, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void + EdgeTypeLeftRight(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter); + + static PathElement * + HandleNestedSubPath(unique_ptr &path_reference, + vector> &conditions, + idx_t element_idx); + + static unique_ptr MatchBindReplace(ClientContext &context, + TableFunctionBindInput &input); + + static unique_ptr GenerateSubpathPatternSubquery( + unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, + vector> &column_list, + unordered_set &named_subpaths); + + static unique_ptr + CreatePathFindingFunction(vector> &path_list, + CreatePropertyGraphInfo &pg_table); + + static void AddPathFinding(const unique_ptr &select_node, + unique_ptr &from_clause, + vector> &conditions, + const string &prev_binding, + const string &edge_binding, + const string &next_binding, + const shared_ptr &edge_table, + const SubPath *subpath); + + static void + AddEdgeJoins(const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter); + + static void ProcessPathList( + vector> &path_pattern, + vector> &conditions, + unique_ptr &from_clause, unique_ptr &select_node, + unordered_map &alias_map, + CreatePropertyGraphInfo &pg_table, int32_t &extra_alias_counter, + vector> &column_list); + + static void + CheckNamedSubpath(SubPath &subpath, + vector> &column_list, + CreatePropertyGraphInfo &pg_table); +}; } // namespace duckdb diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp index d9f466ff..89f2a2ee 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp @@ -33,13 +33,13 @@ struct CSRScanPtrData : public TableFunctionData { public: static unique_ptr ScanCSRPtrBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { + vector &return_types, vector &names) { auto result = make_uniq(); result->csr_id = input.inputs[0].GetValue(); return_types.emplace_back(LogicalType::UBIGINT); names.emplace_back("ptr"); return std::move(result); - } + } public: int32_t csr_id; diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp index 6c219f19..f84fc1af 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp @@ -94,9 +94,9 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind( CheckPropertyGraphTableLabels(vertex_table, table); v_table_names.insert(vertex_table->table_name); - if (vertex_table->hasTableNameAlias()) { - v_table_names.insert(vertex_table->table_name_alias); - } + if (vertex_table->hasTableNameAlias()) { + v_table_names.insert(vertex_table->table_name_alias); + } } for (auto &edge_table : info->edge_tables) { diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 860f1f79..61fcec74 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -24,951 +24,987 @@ #include - namespace duckdb { - shared_ptr - PGQMatchFunction::FindGraphTable(const string& label, - CreatePropertyGraphInfo& pg_table) { - const auto graph_table_entry = pg_table.label_map.find(label); - if (graph_table_entry == pg_table.label_map.end()) { - throw BinderException("The label %s is not registered in property graph %s", - label, pg_table.property_graph_name); - } - - return graph_table_entry->second; - } - - void PGQMatchFunction::CheckInheritance( - const shared_ptr& tableref, PathElement* element, - vector>& conditions) { - if (tableref->main_label == element->label) { - return; - } - auto constant_expression_two = - make_uniq(Value::INTEGER((int32_t) 2)); - const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), - element->label); - - const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); - auto constant_expression_idx_label = - make_uniq(Value::INTEGER(static_cast(idx_of_element))); - - vector> power_of_children; - power_of_children.push_back(std::move(constant_expression_two)); - power_of_children.push_back(std::move(constant_expression_idx_label)); - auto power_of_term = - make_uniq("power", std::move(power_of_children)); - auto bigint_cast = - make_uniq(LogicalType::BIGINT, std::move(power_of_term)); - auto subcategory_colref = make_uniq( - tableref->discriminator, element->variable_binding); - vector> and_children; - and_children.push_back(std::move(subcategory_colref)); - and_children.push_back(std::move(bigint_cast)); - - auto and_expression = - make_uniq("&", std::move(and_children)); - - auto constant_expression_idx_label_comparison = make_uniq( - Value::INTEGER(static_cast(idx_of_element + 1))); - - auto subset_compare = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(and_expression), - std::move(constant_expression_idx_label_comparison)); - conditions.push_back(std::move(subset_compare)); - } - - void PGQMatchFunction::CheckEdgeTableConstraints( - const string& src_reference, const string& dst_reference, - const shared_ptr& edge_table) { - if (src_reference != edge_table->source_reference) { - throw BinderException("Label %s is not registered as a source reference " - "for edge pattern of table %s", - src_reference, edge_table->table_name); - } - if (dst_reference != edge_table->destination_reference) { - throw BinderException("Label %s is not registered as a destination " - "reference for edge pattern of table %s", - src_reference, edge_table->table_name); - } - } - - unique_ptr PGQMatchFunction::CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string& vertex_alias, const string& edge_alias) { - vector> conditions; - - if (vertex_keys.size() != edge_keys.size()) { - throw BinderException("Vertex columns and edge columns size mismatch"); - } - for (idx_t i = 0; i < vertex_keys.size(); i++) { - auto vertex_colref = - make_uniq(vertex_keys[i], vertex_alias); - auto edge_colref = make_uniq(edge_keys[i], edge_alias); - conditions.push_back(make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), - std::move(edge_colref))); - } - unique_ptr where_clause; - - for (auto& condition: conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - - return where_clause; - } - - PathElement* PGQMatchFunction::GetPathElement( - const unique_ptr& path_reference) { - if (path_reference->path_reference_type == - PGQPathReferenceType::PATH_ELEMENT) { - return reinterpret_cast(path_reference.get()); - } - if (path_reference->path_reference_type == - PGQPathReferenceType::SUBPATH) { - return nullptr; - } - throw InternalException("Unknown path reference type detected"); - } - - unique_ptr - PGQMatchFunction::GetCountTable(const shared_ptr& edge_table, - const string& prev_binding) { - // SELECT count(s.id) FROM src s - auto select_count = make_uniq(); - auto select_inner = make_uniq(); - auto ref = make_uniq(); - - ref->table_name = edge_table->source_reference; - ref->alias = prev_binding; - select_inner->from_table = std::move(ref); - vector> children; - children.push_back( - make_uniq(edge_table->source_pk[0], prev_binding)); - - auto count_function = - make_uniq("count", std::move(children)); - select_inner->select_list.push_back(std::move(count_function)); - select_count->node = std::move(select_inner); - auto result = make_uniq(); - result->subquery = std::move(select_count); - result->subquery_type = SubqueryType::SCALAR; - return result; - } - - unique_ptr - PGQMatchFunction::GetJoinRef(const shared_ptr& edge_table, - const string& edge_binding, - const string& prev_binding, - const string& next_binding) { - auto first_join_ref = make_uniq(JoinRefType::REGULAR); - first_join_ref->type = JoinType::INNER; - - auto second_join_ref = make_uniq(JoinRefType::REGULAR); - second_join_ref->type = JoinType::INNER; - - auto edge_base_ref = make_uniq(); - edge_base_ref->table_name = edge_table->table_name; - edge_base_ref->alias = edge_binding; - auto src_base_ref = make_uniq(); - src_base_ref->table_name = edge_table->source_reference; - src_base_ref->alias = prev_binding; - second_join_ref->left = std::move(edge_base_ref); - second_join_ref->right = std::move(src_base_ref); - auto t_from_ref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto src_cid_ref = - make_uniq(edge_table->source_pk[0], prev_binding); - second_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), - std::move(src_cid_ref)); - auto dst_base_ref = make_uniq(); - dst_base_ref->table_name = edge_table->destination_reference; - dst_base_ref->alias = next_binding; - first_join_ref->left = std::move(second_join_ref); - first_join_ref->right = std::move(dst_base_ref); - - auto t_to_ref = make_uniq(edge_table->destination_fk[0], - edge_binding); - auto dst_cid_ref = make_uniq( - edge_table->destination_pk[0], next_binding); - first_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), - std::move(dst_cid_ref)); - return first_join_ref; - } - - unique_ptr PGQMatchFunction::CreateCountCTESubquery() { - //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x - - auto temp_cte_select_node = make_uniq(); - - auto cte_table_ref = make_uniq(); - - cte_table_ref->table_name = "cte1"; - temp_cte_select_node->from_table = std::move(cte_table_ref); - vector> children; - children.push_back(make_uniq("temp", "cte1")); - - auto count_function = - make_uniq("count", std::move(children)); - - auto zero = make_uniq(Value::INTEGER((int32_t) 0)); - - vector> multiply_children; - - multiply_children.push_back(std::move(zero)); - multiply_children.push_back(std::move(count_function)); - auto multiply_function = - make_uniq("multiply", std::move(multiply_children)); - multiply_function->alias = "temp"; - temp_cte_select_node->select_list.push_back(std::move(multiply_function)); - auto temp_cte_select_statement = make_uniq(); - temp_cte_select_statement->node = std::move(temp_cte_select_node); - - auto temp_cte_select_subquery = - make_uniq(std::move(temp_cte_select_statement), "__x"); - //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x - return temp_cte_select_subquery; - } - - unique_ptr - PGQMatchFunction::CreateCSRCTE(const shared_ptr& edge_table, - const string& prev_binding, - const string& edge_binding, - const string& next_binding) { - auto csr_edge_id_constant = - make_uniq(Value::INTEGER(0)); - auto count_create_edge_select = GetCountTable(edge_table, prev_binding); - - auto cast_subquery_expr = make_uniq(); - auto cast_select_node = make_uniq(); - - vector> csr_vertex_children; - csr_vertex_children.push_back( - make_uniq(Value::INTEGER(0))); - - auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); - - csr_vertex_children.push_back(std::move(count_create_vertex_expr)); - - csr_vertex_children.push_back( - make_uniq("dense_id", "sub")); - csr_vertex_children.push_back(make_uniq("cnt", "sub")); - - auto create_vertex_function = make_uniq( - "create_csr_vertex", std::move(csr_vertex_children)); - vector> sum_children; - sum_children.push_back(std::move(create_vertex_function)); - auto sum_function = - make_uniq("sum", std::move(sum_children)); - - auto inner_select_statement = make_uniq(); - auto inner_select_node = make_uniq(); - - auto source_rowid_colref = - make_uniq("rowid", prev_binding); - source_rowid_colref->alias = "dense_id"; - - auto count_create_inner_expr = make_uniq(); - count_create_inner_expr->subquery_type = SubqueryType::SCALAR; - auto edge_src_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - vector> inner_count_children; - inner_count_children.push_back(std::move(edge_src_colref)); - auto inner_count_function = - make_uniq("count", std::move(inner_count_children)); - inner_count_function->alias = "cnt"; - - inner_select_node->select_list.push_back(std::move(source_rowid_colref)); - inner_select_node->select_list.push_back(std::move(inner_count_function)); - auto source_rowid_colref_1 = - make_uniq("rowid", prev_binding); - expression_map_t grouping_expression_map; - inner_select_node->groups.group_expressions.push_back( - std::move(source_rowid_colref_1)); - GroupingSet grouping_set = {0}; - inner_select_node->groups.grouping_sets.push_back(grouping_set); - - auto inner_join_ref = make_uniq(JoinRefType::REGULAR); - inner_join_ref->type = JoinType::LEFT; - auto left_base_ref = make_uniq(); - left_base_ref->table_name = edge_table->source_reference; - left_base_ref->alias = prev_binding; - auto right_base_ref = make_uniq(); - right_base_ref->table_name = edge_table->table_name; - right_base_ref->alias = edge_binding; - inner_join_ref->left = std::move(left_base_ref); - inner_join_ref->right = std::move(right_base_ref); - - auto edge_join_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto vertex_join_colref = - make_uniq(edge_table->source_pk[0], prev_binding); - - inner_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), - std::move(vertex_join_colref)); - inner_select_node->from_table = std::move(inner_join_ref); - inner_select_statement->node = std::move(inner_select_node); - - auto inner_from_subquery = - make_uniq(std::move(inner_select_statement), "sub"); - - cast_select_node->from_table = std::move(inner_from_subquery); - - cast_select_node->select_list.push_back(std::move(sum_function)); - auto cast_select_stmt = make_uniq(); - cast_select_stmt->node = std::move(cast_select_node); - cast_subquery_expr->subquery = std::move(cast_select_stmt); - cast_subquery_expr->subquery_type = SubqueryType::SCALAR; - - auto src_rowid_colref = make_uniq("rowid", prev_binding); - auto dst_rowid_colref = make_uniq("rowid", next_binding); - auto edge_rowid_colref = - make_uniq("rowid", edge_binding); - - auto cast_expression = make_uniq( - LogicalType::BIGINT, std::move(cast_subquery_expr)); - - vector> csr_edge_children; - csr_edge_children.push_back(std::move(csr_edge_id_constant)); - csr_edge_children.push_back(std::move(count_create_edge_select)); - csr_edge_children.push_back(std::move(cast_expression)); - csr_edge_children.push_back(std::move(src_rowid_colref)); - csr_edge_children.push_back(std::move(dst_rowid_colref)); - csr_edge_children.push_back(std::move(edge_rowid_colref)); - - auto outer_select_node = make_uniq(); - - auto create_csr_edge_function = make_uniq( - "create_csr_edge", std::move(csr_edge_children)); - create_csr_edge_function->alias = "temp"; - - outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); - outer_select_node->from_table = - GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); - auto outer_select_statement = make_uniq(); - - outer_select_statement->node = std::move(outer_select_node); - auto info = make_uniq(); - info->query = std::move(outer_select_statement); - return info; - } - - void PGQMatchFunction::EdgeTypeAny( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - // (a) src.key = edge.src - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, - prev_binding, edge_binding); - // (b) dst.key = edge.dst - auto dst_left_expr = CreateMatchJoinExpression( - edge_table->destination_pk, edge_table->destination_fk, - next_binding, edge_binding); - // (a) AND (b) - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - // (c) src.key = edge.dst - auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - // (d) dst.key = edge.src - auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->source_fk, - next_binding, edge_binding); - // (c) AND (d) - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - // ((a) AND (b)) OR ((c) AND (d)) - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); - } - - void PGQMatchFunction::EdgeTypeLeft( - const shared_ptr& edge_table, const string& next_table_name, - const string& prev_table_name, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - next_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding)); - } - - void PGQMatchFunction::EdgeTypeRight( - const shared_ptr& edge_table, const string& next_table_name, - const string& prev_table_name, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - prev_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - next_binding, edge_binding)); - } - - void PGQMatchFunction::EdgeTypeLeftRight( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions, - unordered_map& alias_map, int32_t& extra_alias_counter) { - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); - auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - - const auto additional_edge_alias = - edge_binding + std::to_string(extra_alias_counter); - extra_alias_counter++; - - alias_map[additional_edge_alias] = edge_table->table_name; - - auto src_right_expr = - CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, - prev_binding, additional_edge_alias); - auto dst_right_expr = CreateMatchJoinExpression( - edge_table->destination_pk, edge_table->destination_fk, next_binding, - additional_edge_alias); - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); - } - - PathElement* PGQMatchFunction::HandleNestedSubPath( - unique_ptr& path_reference, - vector>& conditions, idx_t element_idx) { - auto subpath = reinterpret_cast(path_reference.get()); - return GetPathElement(subpath->path_list[element_idx]); - } - - unique_ptr - CreateWhereClause(vector>& conditions) { - unique_ptr where_clause; - for (auto& condition: conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - return where_clause; - } - - unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector>& path_list, CreatePropertyGraphInfo& pg_table) { - // This method will return a SubqueryRef of a list of rowids - // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend - // The difficulty is that there may be a (un)bounded path pattern at some point in the query - // This is computed using the shortestpath() UDF and returns a list. - // This list will be part of the full list of element rowids, using list_concat. - // For now we will only support returning rowids - unique_ptr final_list; - - auto previous_vertex_element = GetPathElement(path_list[0]); - if (!previous_vertex_element) { - // We hit a vertex element with a WHERE, but we only care about the rowid here - auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); - previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); - } - - for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { - auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); - next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); - } - - auto edge_element = GetPathElement(path_list[idx_i]); - if (!edge_element) { - auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); - if (edge_subpath->upper > 1) { - // (un)bounded shortest path - // Add the shortest path UDF - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, pg_table); - auto src_row_id = make_uniq("rowid", previous_vertex_element->variable_binding); - auto dst_row_id = make_uniq("rowid", next_vertex_element->variable_binding); - auto csr_id = make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back( - std::move(GetCountTable(edge_table, previous_vertex_element->variable_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto shortest_path_function = make_uniq("shortestpath", - std::move(pathfinding_children)); - - if (!final_list) { - final_list = std::move(shortest_path_function); - } else { - auto pop_front_shortest_path_children = vector>(); - pop_front_shortest_path_children.push_back(std::move(shortest_path_function)); - auto pop_front = make_uniq("array_pop_front", - std::move(pop_front_shortest_path_children)); - - auto final_list_children = vector>(); - final_list_children.push_back(std::move(final_list)); - final_list_children.push_back(std::move(pop_front)); - final_list = make_uniq("list_concat", std::move(final_list_children)); - } - // Set next vertex to be previous - previous_vertex_element = next_vertex_element; - continue; - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - } - auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); - auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); - auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); - auto starting_list_children = vector>(); - - if (!final_list) { - starting_list_children.push_back(std::move(previous_rowid)); - starting_list_children.push_back(std::move(edge_rowid)); - starting_list_children.push_back(std::move(next_rowid)); - final_list = make_uniq("list_value", std::move(starting_list_children)); - } else { - starting_list_children.push_back(std::move(edge_rowid)); - starting_list_children.push_back(std::move(next_rowid)); - auto next_elements_list = make_uniq("list_value", std::move(starting_list_children)); - auto final_list_children = vector>(); - final_list_children.push_back(std::move(final_list)); - final_list_children.push_back(std::move(next_elements_list)); - final_list = make_uniq("list_concat", std::move(final_list_children)); - } - previous_vertex_element = next_vertex_element; - } - - return final_list; - } - - void PGQMatchFunction::AddEdgeJoins(const unique_ptr& select_node, - const shared_ptr& edge_table, - const shared_ptr& previous_vertex_table, - const shared_ptr& next_vertex_table, - PGQMatchType edge_type, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions, - unordered_map& alias_map, - int32_t& extra_alias_counter) { - switch (edge_type) { - case PGQMatchType::MATCH_EDGE_ANY: { - select_node->modifiers.push_back(make_uniq()); - EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions); - break; - } - case PGQMatchType::MATCH_EDGE_LEFT: - EdgeTypeLeft(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_RIGHT: - EdgeTypeRight(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - EdgeTypeLeftRight(edge_table, edge_binding, - prev_binding, next_binding, conditions, - alias_map, extra_alias_counter); - break; - } - default: - throw InternalException("Unknown match type found"); - } - } - - void PGQMatchFunction::AddPathFinding(const unique_ptr& select_node, - unique_ptr& from_clause, - vector>& conditions, - const string& prev_binding, const string& edge_binding, - const string& next_binding, - const shared_ptr& edge_table, - const SubPath* subpath) { - //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - select_node->cte_map.map["cte1"] = CreateCSRCTE( - edge_table, prev_binding, - edge_binding, - next_binding); - - auto temp_cte_select_subquery = CreateCountCTESubquery(); - - if (from_clause) { - // create a cross join since there is already something in the - // from clause - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(from_clause); - from_join->right = std::move(temp_cte_select_subquery); - from_clause = std::move(from_join); - } else { - from_clause = std::move(temp_cte_select_subquery); - } - //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - - //! START - //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) - //! from dst c, a.rowid, b.rowid) between lower and upper - - auto src_row_id = make_uniq( - "rowid", prev_binding); - auto dst_row_id = make_uniq( - "rowid", next_binding); - auto csr_id = - make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back(std::move(GetCountTable( - edge_table, prev_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto reachability_function = make_uniq( - "iterativelength", std::move(pathfinding_children)); - - auto cte_col_ref = make_uniq("temp", "__x"); - - vector> addition_children; - addition_children.push_back(std::move(cte_col_ref)); - addition_children.push_back(std::move(reachability_function)); - - auto addition_function = make_uniq( - "add", std::move(addition_children)); - auto lower_limit = - make_uniq(Value::INTEGER(static_cast(subpath->lower))); - auto upper_limit = - make_uniq(Value::INTEGER(static_cast(subpath->upper))); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); - - //! END - //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid) between lower and upper - } - - void PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, - CreatePropertyGraphInfo& pg_table) { - for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { - FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); - if (parsed_ref == nullptr) { - continue; - } - auto column_ref = dynamic_cast(parsed_ref->children[0].get()); - if (column_ref == nullptr) { - continue; - } - // Trying to check parsed_ref->alias directly leads to a segfault - string column_alias = parsed_ref->alias; - - if (column_ref->column_names[0] != subpath.path_variable) { - continue; - } - if (parsed_ref->function_name == "element_id") { - // Check subpath name matches the column referenced in the function --> element_id(named_subpath) - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - - if (column_alias.empty()) { - shortest_path_function->alias = "element_id(" + subpath.path_variable + ")"; - } else { - shortest_path_function->alias = column_alias; - } - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); - } else if (parsed_ref->function_name == "path_length") { - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - auto path_len_children = vector>(); - path_len_children.push_back(std::move(shortest_path_function)); - auto path_len = - make_uniq("len", std::move(path_len_children)); - auto constant_two = make_uniq(Value::INTEGER(2)); - vector> div_children; - div_children.push_back(std::move(path_len)); - div_children.push_back(std::move(constant_two)); - auto path_length_function = - make_uniq("//", std::move(div_children)); - path_length_function->alias = column_alias.empty() ? "path_length(" + subpath.path_variable + ")" : column_alias; - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); - } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - auto list_slice_children = vector>(); - list_slice_children.push_back(std::move(shortest_path_function)); - - if (parsed_ref->function_name == "vertices") { - list_slice_children.push_back(make_uniq(Value::INTEGER(1))); - } else { - list_slice_children.push_back(make_uniq(Value::INTEGER(2))); - } - auto slice_end = make_uniq(Value::INTEGER(-1)); - auto slice_step = make_uniq(Value::INTEGER(2)); - - list_slice_children.push_back(std::move(slice_end)); - list_slice_children.push_back(std::move(slice_step)); - auto list_slice = - make_uniq("list_slice", std::move(list_slice_children)); - if (parsed_ref->function_name == "vertices") { - list_slice->alias = column_alias.empty() ? "vertices(" + subpath.path_variable + ")" : column_alias; - } else { - list_slice->alias = column_alias.empty() ? "edges(" + subpath.path_variable + ")" : column_alias; - } - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); - } - } - } - - void PGQMatchFunction::ProcessPathList(vector>& path_list, - vector>& conditions, - unique_ptr& from_clause, unique_ptr& select_node, - unordered_map& alias_map, - CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, - vector>& column_list) { - PathElement* previous_vertex_element = - GetPathElement(path_list[0]); - if (!previous_vertex_element) { - const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); - CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); - if (previous_vertex_subpath->where_clause) { - conditions.push_back(std::move(previous_vertex_subpath->where_clause)); - } - if (previous_vertex_subpath->path_list.size() == 1) { - previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); - } else { - // Add the shortest path if the name is found in the column_list - ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, - alias_map, pg_table, extra_alias_counter, column_list); - return; - } - } - auto previous_vertex_table = - FindGraphTable(previous_vertex_element->label, pg_table); - CheckInheritance(previous_vertex_table, previous_vertex_element, - conditions); - alias_map[previous_vertex_element->variable_binding] = - previous_vertex_table->table_name; - - for (idx_t idx_j = 1; - idx_j < path_list.size(); - idx_j = idx_j + 2) { - PathElement* next_vertex_element = - GetPathElement(path_list[idx_j + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = - reinterpret_cast(path_list[idx_j + 1].get()); - if (next_vertex_subpath->path_list.size() > 1) { - throw NotImplementedException("Recursive patterns are not yet supported."); - } - if (next_vertex_subpath->where_clause) { - conditions.push_back(std::move(next_vertex_subpath->where_clause)); - } - next_vertex_element = - GetPathElement(next_vertex_subpath->path_list[0]); - } - if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - throw BinderException("Vertex and edge patterns must be alternated."); - } - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; - - PathElement* edge_element = - GetPathElement(path_list[idx_j]); - if (!edge_element) { - // We are dealing with a subpath - auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); - if (edge_subpath->where_clause) { - conditions.push_back(std::move(edge_subpath->where_clause)); - } - if (edge_subpath->path_list.size() > 1) { - throw NotImplementedException("Subpath on an edge is not yet supported."); - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, pg_table); - - if (edge_subpath->upper > 1) { - // Add the path-finding - AddPathFinding(select_node, from_clause, conditions, - previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding, - edge_table, edge_subpath); - } else { - alias_map[edge_element->variable_binding] = edge_table->source_reference; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, - edge_element->variable_binding, previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); - } - } else { - // The edge element is a path element without WHERE or path-finding. - auto edge_table = FindGraphTable(edge_element->label, pg_table); - CheckInheritance(edge_table, edge_element, conditions); - // check aliases - alias_map[edge_element->variable_binding] = edge_table->table_name; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, edge_element->variable_binding, - previous_vertex_element->variable_binding, next_vertex_element->variable_binding, - conditions, alias_map, extra_alias_counter); - // Check the edge type - // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id - // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id - // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR - // (b.dst = a.id AND b.src - // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND - // (b.dst = a.id AND b.src - //= c.id) - } - previous_vertex_element = next_vertex_element; - previous_vertex_table = next_vertex_table; - } - } - - - unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext& context, - TableFunctionBindInput&) { - auto duckpgq_state_entry = context.registered_state.find("duckpgq"); - auto duckpgq_state = dynamic_cast(duckpgq_state_entry->second.get()); - - auto ref = dynamic_cast( - duckpgq_state->transform_expression.get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); - - auto data = make_uniq(); - - vector> conditions; - - auto select_node = make_uniq(); - unordered_map alias_map; - unique_ptr from_clause; - - int32_t extra_alias_counter = 0; - for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { - auto& path_pattern = ref->path_patterns[idx_i]; - // Check if the element is PathElement or a Subpath with potentially many items - ProcessPathList(path_pattern->path_elements, conditions, from_clause, select_node, - alias_map, *pg_table, extra_alias_counter, ref->column_list); - } - - // Go through all aliases encountered - for (auto& table_alias_entry: alias_map) { - auto table_ref = make_uniq(); - table_ref->table_name = table_alias_entry.second; - table_ref->alias = table_alias_entry.first; - - if (from_clause) { - auto new_root = make_uniq(JoinRefType::CROSS); - new_root->left = std::move(from_clause); - new_root->right = std::move(table_ref); - from_clause = std::move(new_root); - } else { - from_clause = std::move(table_ref); - } - } - - select_node->from_table = std::move(from_clause); - - if (ref->where_clause) { - conditions.push_back(std::move(ref->where_clause)); - } - std::vector> final_column_list; - - for (auto& expression: ref->column_list) { - unordered_set named_subpaths; - auto column_ref = dynamic_cast(expression.get()); - if (column_ref != nullptr) { - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - final_column_list.emplace_back(make_uniq( - "path", column_ref->column_names[0])); - } else { - final_column_list.push_back(std::move(expression)); - } - continue; - } - auto function_ref = dynamic_cast(expression.get()); - if (function_ref != nullptr) { - if (function_ref->function_name == "path_length") { - column_ref = dynamic_cast( - function_ref->children[0].get()); - if (column_ref == nullptr) { - continue; - } - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - auto path_ref = make_uniq( - "path", column_ref->column_names[0]); - vector> path_children; - path_children.push_back(std::move(path_ref)); - auto path_len = - make_uniq("len", std::move(path_children)); - auto constant_two = make_uniq(Value::INTEGER(2)); - vector> div_children; - div_children.push_back(std::move(path_len)); - div_children.push_back(std::move(constant_two)); - auto div_expression = - make_uniq("//", std::move(div_children)); - div_expression->alias = - "path_length_" + column_ref->column_names[0]; - final_column_list.emplace_back(std::move(div_expression)); - } - } else { - final_column_list.push_back(std::move(expression)); - } - - continue; - } - - final_column_list.push_back(std::move(expression)); - } - - select_node->where_clause = CreateWhereClause(conditions); - select_node->select_list = std::move(final_column_list); - - auto subquery = make_uniq(); - subquery->node = std::move(select_node); - - auto result = make_uniq(std::move(subquery), ref->alias); - - return std::move(result); - } +shared_ptr +PGQMatchFunction::FindGraphTable(const string &label, + CreatePropertyGraphInfo &pg_table) { + const auto graph_table_entry = pg_table.label_map.find(label); + if (graph_table_entry == pg_table.label_map.end()) { + throw BinderException("The label %s is not registered in property graph %s", + label, pg_table.property_graph_name); + } + + return graph_table_entry->second; +} + +void PGQMatchFunction::CheckInheritance( + const shared_ptr &tableref, PathElement *element, + vector> &conditions) { + if (tableref->main_label == element->label) { + return; + } + auto constant_expression_two = + make_uniq(Value::INTEGER((int32_t)2)); + const auto itr = std::find(tableref->sub_labels.begin(), + tableref->sub_labels.end(), element->label); + + const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); + auto constant_expression_idx_label = make_uniq( + Value::INTEGER(static_cast(idx_of_element))); + + vector> power_of_children; + power_of_children.push_back(std::move(constant_expression_two)); + power_of_children.push_back(std::move(constant_expression_idx_label)); + auto power_of_term = + make_uniq("power", std::move(power_of_children)); + auto bigint_cast = + make_uniq(LogicalType::BIGINT, std::move(power_of_term)); + auto subcategory_colref = make_uniq( + tableref->discriminator, element->variable_binding); + vector> and_children; + and_children.push_back(std::move(subcategory_colref)); + and_children.push_back(std::move(bigint_cast)); + + auto and_expression = + make_uniq("&", std::move(and_children)); + + auto constant_expression_idx_label_comparison = make_uniq( + Value::INTEGER(static_cast(idx_of_element + 1))); + + auto subset_compare = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(and_expression), + std::move(constant_expression_idx_label_comparison)); + conditions.push_back(std::move(subset_compare)); +} + +void PGQMatchFunction::CheckEdgeTableConstraints( + const string &src_reference, const string &dst_reference, + const shared_ptr &edge_table) { + if (src_reference != edge_table->source_reference) { + throw BinderException("Label %s is not registered as a source reference " + "for edge pattern of table %s", + src_reference, edge_table->table_name); + } + if (dst_reference != edge_table->destination_reference) { + throw BinderException("Label %s is not registered as a destination " + "reference for edge pattern of table %s", + src_reference, edge_table->table_name); + } +} + +unique_ptr PGQMatchFunction::CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string &vertex_alias, const string &edge_alias) { + vector> conditions; + + if (vertex_keys.size() != edge_keys.size()) { + throw BinderException("Vertex columns and edge columns size mismatch"); + } + for (idx_t i = 0; i < vertex_keys.size(); i++) { + auto vertex_colref = + make_uniq(vertex_keys[i], vertex_alias); + auto edge_colref = make_uniq(edge_keys[i], edge_alias); + conditions.push_back(make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), + std::move(edge_colref))); + } + unique_ptr where_clause; + + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + + return where_clause; +} + +PathElement *PGQMatchFunction::GetPathElement( + const unique_ptr &path_reference) { + if (path_reference->path_reference_type == + PGQPathReferenceType::PATH_ELEMENT) { + return reinterpret_cast(path_reference.get()); + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return nullptr; + } + throw InternalException("Unknown path reference type detected"); +} + +unique_ptr PGQMatchFunction::GetCountTable( + const shared_ptr &edge_table, + const string &prev_binding) { + // SELECT count(s.id) FROM src s + auto select_count = make_uniq(); + auto select_inner = make_uniq(); + auto ref = make_uniq(); + + ref->table_name = edge_table->source_reference; + ref->alias = prev_binding; + select_inner->from_table = std::move(ref); + vector> children; + children.push_back( + make_uniq(edge_table->source_pk[0], prev_binding)); + + auto count_function = + make_uniq("count", std::move(children)); + select_inner->select_list.push_back(std::move(count_function)); + select_count->node = std::move(select_inner); + auto result = make_uniq(); + result->subquery = std::move(select_count); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + +unique_ptr +PGQMatchFunction::GetJoinRef(const shared_ptr &edge_table, + const string &edge_binding, + const string &prev_binding, + const string &next_binding) { + auto first_join_ref = make_uniq(JoinRefType::REGULAR); + first_join_ref->type = JoinType::INNER; + + auto second_join_ref = make_uniq(JoinRefType::REGULAR); + second_join_ref->type = JoinType::INNER; + + auto edge_base_ref = make_uniq(); + edge_base_ref->table_name = edge_table->table_name; + edge_base_ref->alias = edge_binding; + auto src_base_ref = make_uniq(); + src_base_ref->table_name = edge_table->source_reference; + src_base_ref->alias = prev_binding; + second_join_ref->left = std::move(edge_base_ref); + second_join_ref->right = std::move(src_base_ref); + auto t_from_ref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto src_cid_ref = + make_uniq(edge_table->source_pk[0], prev_binding); + second_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), + std::move(src_cid_ref)); + auto dst_base_ref = make_uniq(); + dst_base_ref->table_name = edge_table->destination_reference; + dst_base_ref->alias = next_binding; + first_join_ref->left = std::move(second_join_ref); + first_join_ref->right = std::move(dst_base_ref); + + auto t_to_ref = make_uniq(edge_table->destination_fk[0], + edge_binding); + auto dst_cid_ref = make_uniq( + edge_table->destination_pk[0], next_binding); + first_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), + std::move(dst_cid_ref)); + return first_join_ref; +} + +unique_ptr PGQMatchFunction::CreateCountCTESubquery() { + //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x + + auto temp_cte_select_node = make_uniq(); + + auto cte_table_ref = make_uniq(); + + cte_table_ref->table_name = "cte1"; + temp_cte_select_node->from_table = std::move(cte_table_ref); + vector> children; + children.push_back(make_uniq("temp", "cte1")); + + auto count_function = + make_uniq("count", std::move(children)); + + auto zero = make_uniq(Value::INTEGER((int32_t)0)); + + vector> multiply_children; + + multiply_children.push_back(std::move(zero)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = + make_uniq("multiply", std::move(multiply_children)); + multiply_function->alias = "temp"; + temp_cte_select_node->select_list.push_back(std::move(multiply_function)); + auto temp_cte_select_statement = make_uniq(); + temp_cte_select_statement->node = std::move(temp_cte_select_node); + + auto temp_cte_select_subquery = + make_uniq(std::move(temp_cte_select_statement), "__x"); + //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x + return temp_cte_select_subquery; +} + +unique_ptr +PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, + const string &prev_binding, + const string &edge_binding, + const string &next_binding) { + auto csr_edge_id_constant = make_uniq(Value::INTEGER(0)); + auto count_create_edge_select = GetCountTable(edge_table, prev_binding); + + auto cast_subquery_expr = make_uniq(); + auto cast_select_node = make_uniq(); + + vector> csr_vertex_children; + csr_vertex_children.push_back( + make_uniq(Value::INTEGER(0))); + + auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); + + csr_vertex_children.push_back(std::move(count_create_vertex_expr)); + + csr_vertex_children.push_back( + make_uniq("dense_id", "sub")); + csr_vertex_children.push_back(make_uniq("cnt", "sub")); + + auto create_vertex_function = make_uniq( + "create_csr_vertex", std::move(csr_vertex_children)); + vector> sum_children; + sum_children.push_back(std::move(create_vertex_function)); + auto sum_function = + make_uniq("sum", std::move(sum_children)); + + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + auto source_rowid_colref = + make_uniq("rowid", prev_binding); + source_rowid_colref->alias = "dense_id"; + + auto count_create_inner_expr = make_uniq(); + count_create_inner_expr->subquery_type = SubqueryType::SCALAR; + auto edge_src_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + vector> inner_count_children; + inner_count_children.push_back(std::move(edge_src_colref)); + auto inner_count_function = + make_uniq("count", std::move(inner_count_children)); + inner_count_function->alias = "cnt"; + + inner_select_node->select_list.push_back(std::move(source_rowid_colref)); + inner_select_node->select_list.push_back(std::move(inner_count_function)); + auto source_rowid_colref_1 = + make_uniq("rowid", prev_binding); + expression_map_t grouping_expression_map; + inner_select_node->groups.group_expressions.push_back( + std::move(source_rowid_colref_1)); + GroupingSet grouping_set = {0}; + inner_select_node->groups.grouping_sets.push_back(grouping_set); + + auto inner_join_ref = make_uniq(JoinRefType::REGULAR); + inner_join_ref->type = JoinType::LEFT; + auto left_base_ref = make_uniq(); + left_base_ref->table_name = edge_table->source_reference; + left_base_ref->alias = prev_binding; + auto right_base_ref = make_uniq(); + right_base_ref->table_name = edge_table->table_name; + right_base_ref->alias = edge_binding; + inner_join_ref->left = std::move(left_base_ref); + inner_join_ref->right = std::move(right_base_ref); + + auto edge_join_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto vertex_join_colref = + make_uniq(edge_table->source_pk[0], prev_binding); + + inner_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), + std::move(vertex_join_colref)); + inner_select_node->from_table = std::move(inner_join_ref); + inner_select_statement->node = std::move(inner_select_node); + + auto inner_from_subquery = + make_uniq(std::move(inner_select_statement), "sub"); + + cast_select_node->from_table = std::move(inner_from_subquery); + + cast_select_node->select_list.push_back(std::move(sum_function)); + auto cast_select_stmt = make_uniq(); + cast_select_stmt->node = std::move(cast_select_node); + cast_subquery_expr->subquery = std::move(cast_select_stmt); + cast_subquery_expr->subquery_type = SubqueryType::SCALAR; + + auto src_rowid_colref = make_uniq("rowid", prev_binding); + auto dst_rowid_colref = make_uniq("rowid", next_binding); + auto edge_rowid_colref = + make_uniq("rowid", edge_binding); + + auto cast_expression = make_uniq( + LogicalType::BIGINT, std::move(cast_subquery_expr)); + + vector> csr_edge_children; + csr_edge_children.push_back(std::move(csr_edge_id_constant)); + csr_edge_children.push_back(std::move(count_create_edge_select)); + csr_edge_children.push_back(std::move(cast_expression)); + csr_edge_children.push_back(std::move(src_rowid_colref)); + csr_edge_children.push_back(std::move(dst_rowid_colref)); + csr_edge_children.push_back(std::move(edge_rowid_colref)); + + auto outer_select_node = make_uniq(); + + auto create_csr_edge_function = make_uniq( + "create_csr_edge", std::move(csr_edge_children)); + create_csr_edge_function->alias = "temp"; + + outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); + outer_select_node->from_table = + GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); + auto outer_select_statement = make_uniq(); + + outer_select_statement->node = std::move(outer_select_node); + auto info = make_uniq(); + info->query = std::move(outer_select_statement); + return info; +} + +void PGQMatchFunction::EdgeTypeAny( + const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + // (a) src.key = edge.src + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding); + // (b) dst.key = edge.dst + auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + next_binding, edge_binding); + // (a) AND (b) + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + // (c) src.key = edge.dst + auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + // (d) dst.key = edge.src + auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->source_fk, + next_binding, edge_binding); + // (c) AND (d) + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + // ((a) AND (b)) OR ((c) AND (d)) + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); +} + +void PGQMatchFunction::EdgeTypeLeft( + const shared_ptr &edge_table, + const string &next_table_name, const string &prev_table_name, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + next_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeRight( + const shared_ptr &edge_table, + const string &next_table_name, const string &prev_table_name, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + prev_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + next_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeLeftRight( + const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, int32_t &extra_alias_counter) { + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); + auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + + const auto additional_edge_alias = + edge_binding + std::to_string(extra_alias_counter); + extra_alias_counter++; + + alias_map[additional_edge_alias] = edge_table->table_name; + + auto src_right_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, + prev_binding, additional_edge_alias); + auto dst_right_expr = CreateMatchJoinExpression( + edge_table->destination_pk, edge_table->destination_fk, next_binding, + additional_edge_alias); + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); +} + +PathElement *PGQMatchFunction::HandleNestedSubPath( + unique_ptr &path_reference, + vector> &conditions, idx_t element_idx) { + auto subpath = reinterpret_cast(path_reference.get()); + return GetPathElement(subpath->path_list[element_idx]); +} + +unique_ptr +CreateWhereClause(vector> &conditions) { + unique_ptr where_clause; + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + return where_clause; +} + +unique_ptr PGQMatchFunction::CreatePathFindingFunction( + vector> &path_list, + CreatePropertyGraphInfo &pg_table) { + // This method will return a SubqueryRef of a list of rowids + // For every vertex and edge element, we add the rowid to the list using + // list_append, or list_prepend The difficulty is that there may be a + // (un)bounded path pattern at some point in the query This is computed using + // the shortestpath() UDF and returns a list. This list will be part of the + // full list of element rowids, using list_concat. For now we will only + // support returning rowids + unique_ptr final_list; + + auto previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + // We hit a vertex element with a WHERE, but we only care about the rowid + // here + auto previous_vertex_subpath = + reinterpret_cast(path_list[0].get()); + previous_vertex_element = + GetPathElement(previous_vertex_subpath->path_list[0]); + } + + for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { + auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_list[idx_i + 1].get()); + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + + auto edge_element = GetPathElement(path_list[idx_i]); + if (!edge_element) { + auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); + if (edge_subpath->upper > 1) { + // (un)bounded shortest path + // Add the shortest path UDF + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + auto src_row_id = make_uniq( + "rowid", previous_vertex_element->variable_binding); + auto dst_row_id = make_uniq( + "rowid", next_vertex_element->variable_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back(std::move(GetCountTable( + edge_table, previous_vertex_element->variable_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto shortest_path_function = make_uniq( + "shortestpath", std::move(pathfinding_children)); + + if (!final_list) { + final_list = std::move(shortest_path_function); + } else { + auto pop_front_shortest_path_children = + vector>(); + pop_front_shortest_path_children.push_back( + std::move(shortest_path_function)); + auto pop_front = make_uniq( + "array_pop_front", std::move(pop_front_shortest_path_children)); + + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(pop_front)); + final_list = make_uniq( + "list_concat", std::move(final_list_children)); + } + // Set next vertex to be previous + previous_vertex_element = next_vertex_element; + continue; + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + } + auto previous_rowid = make_uniq( + "rowid", previous_vertex_element->variable_binding); + auto edge_rowid = + make_uniq("rowid", edge_element->variable_binding); + auto next_rowid = make_uniq( + "rowid", next_vertex_element->variable_binding); + auto starting_list_children = vector>(); + + if (!final_list) { + starting_list_children.push_back(std::move(previous_rowid)); + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + final_list = make_uniq( + "list_value", std::move(starting_list_children)); + } else { + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + auto next_elements_list = make_uniq( + "list_value", std::move(starting_list_children)); + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(next_elements_list)); + final_list = make_uniq( + "list_concat", std::move(final_list_children)); + } + previous_vertex_element = next_vertex_element; + } + + return final_list; +} + +void PGQMatchFunction::AddEdgeJoins( + const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + unordered_map &alias_map, int32_t &extra_alias_counter) { + switch (edge_type) { + case PGQMatchType::MATCH_EDGE_ANY: { + select_node->modifiers.push_back(make_uniq()); + EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, + conditions); + break; + } + case PGQMatchType::MATCH_EDGE_LEFT: + EdgeTypeLeft(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, edge_binding, prev_binding, + next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_RIGHT: + EdgeTypeRight(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, edge_binding, prev_binding, + next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + EdgeTypeLeftRight(edge_table, edge_binding, prev_binding, next_binding, + conditions, alias_map, extra_alias_counter); + break; + } + default: + throw InternalException("Unknown match type found"); + } +} + +void PGQMatchFunction::AddPathFinding( + const unique_ptr &select_node, + unique_ptr &from_clause, + vector> &conditions, + const string &prev_binding, const string &edge_binding, + const string &next_binding, + const shared_ptr &edge_table, const SubPath *subpath) { + //! START + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + select_node->cte_map.map["cte1"] = + CreateCSRCTE(edge_table, prev_binding, edge_binding, next_binding); + + auto temp_cte_select_subquery = CreateCountCTESubquery(); + + if (from_clause) { + // create a cross join since there is already something in the + // from clause + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(from_clause); + from_join->right = std::move(temp_cte_select_subquery); + from_clause = std::move(from_join); + } else { + from_clause = std::move(temp_cte_select_subquery); + } + //! END + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + + //! START + //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) + //! from dst c, a.rowid, b.rowid) between lower and upper + + auto src_row_id = make_uniq("rowid", prev_binding); + auto dst_row_id = make_uniq("rowid", next_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back( + std::move(GetCountTable(edge_table, prev_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto reachability_function = make_uniq( + "iterativelength", std::move(pathfinding_children)); + + auto cte_col_ref = make_uniq("temp", "__x"); + + vector> addition_children; + addition_children.push_back(std::move(cte_col_ref)); + addition_children.push_back(std::move(reachability_function)); + + auto addition_function = + make_uniq("add", std::move(addition_children)); + auto lower_limit = make_uniq( + Value::INTEGER(static_cast(subpath->lower))); + auto upper_limit = make_uniq( + Value::INTEGER(static_cast(subpath->upper))); + auto between_expression = make_uniq( + std::move(addition_function), std::move(lower_limit), + std::move(upper_limit)); + conditions.push_back(std::move(between_expression)); + + //! END + //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) + //! from src s, a.rowid, b.rowid) between lower and upper +} + +void PGQMatchFunction::CheckNamedSubpath( + SubPath &subpath, vector> &column_list, + CreatePropertyGraphInfo &pg_table) { + for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { + FunctionExpression *parsed_ref = + dynamic_cast(column_list[idx_i].get()); + if (parsed_ref == nullptr) { + continue; + } + auto column_ref = + dynamic_cast(parsed_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + // Trying to check parsed_ref->alias directly leads to a segfault + string column_alias = parsed_ref->alias; + + if (column_ref->column_names[0] != subpath.path_variable) { + continue; + } + if (parsed_ref->function_name == "element_id") { + // Check subpath name matches the column referenced in the function --> + // element_id(named_subpath) + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + + if (column_alias.empty()) { + shortest_path_function->alias = + "element_id(" + subpath.path_variable + ")"; + } else { + shortest_path_function->alias = column_alias; + } + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, + std::move(shortest_path_function)); + } else if (parsed_ref->function_name == "path_length") { + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + auto path_len_children = vector>(); + path_len_children.push_back(std::move(shortest_path_function)); + auto path_len = + make_uniq("len", std::move(path_len_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto path_length_function = + make_uniq("//", std::move(div_children)); + path_length_function->alias = + column_alias.empty() ? "path_length(" + subpath.path_variable + ")" + : column_alias; + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, + std::move(path_length_function)); + } else if (parsed_ref->function_name == "vertices" || + parsed_ref->function_name == "edges") { + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + auto list_slice_children = vector>(); + list_slice_children.push_back(std::move(shortest_path_function)); + + if (parsed_ref->function_name == "vertices") { + list_slice_children.push_back( + make_uniq(Value::INTEGER(1))); + } else { + list_slice_children.push_back( + make_uniq(Value::INTEGER(2))); + } + auto slice_end = make_uniq(Value::INTEGER(-1)); + auto slice_step = make_uniq(Value::INTEGER(2)); + + list_slice_children.push_back(std::move(slice_end)); + list_slice_children.push_back(std::move(slice_step)); + auto list_slice = make_uniq( + "list_slice", std::move(list_slice_children)); + if (parsed_ref->function_name == "vertices") { + list_slice->alias = column_alias.empty() + ? "vertices(" + subpath.path_variable + ")" + : column_alias; + } else { + list_slice->alias = column_alias.empty() + ? "edges(" + subpath.path_variable + ")" + : column_alias; + } + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); + } + } +} + +void PGQMatchFunction::ProcessPathList( + vector> &path_list, + vector> &conditions, + unique_ptr &from_clause, unique_ptr &select_node, + unordered_map &alias_map, CreatePropertyGraphInfo &pg_table, + int32_t &extra_alias_counter, + vector> &column_list) { + PathElement *previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + const auto previous_vertex_subpath = + reinterpret_cast(path_list[0].get()); + CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); + if (previous_vertex_subpath->where_clause) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (previous_vertex_subpath->path_list.size() == 1) { + previous_vertex_element = + GetPathElement(previous_vertex_subpath->path_list[0]); + } else { + // Add the shortest path if the name is found in the column_list + ProcessPathList(previous_vertex_subpath->path_list, conditions, + from_clause, select_node, alias_map, pg_table, + extra_alias_counter, column_list); + return; + } + } + auto previous_vertex_table = + FindGraphTable(previous_vertex_element->label, pg_table); + CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); + alias_map[previous_vertex_element->variable_binding] = + previous_vertex_table->table_name; + + for (idx_t idx_j = 1; idx_j < path_list.size(); idx_j = idx_j + 2) { + PathElement *next_vertex_element = GetPathElement(path_list[idx_j + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_list[idx_j + 1].get()); + if (next_vertex_subpath->path_list.size() > 1) { + throw NotImplementedException( + "Recursive patterns are not yet supported."); + } + if (next_vertex_subpath->where_clause) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + throw BinderException("Vertex and edge patterns must be alternated."); + } + auto next_vertex_table = + FindGraphTable(next_vertex_element->label, pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = + next_vertex_table->table_name; + + PathElement *edge_element = GetPathElement(path_list[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); + if (edge_subpath->where_clause) { + conditions.push_back(std::move(edge_subpath->where_clause)); + } + if (edge_subpath->path_list.size() > 1) { + throw NotImplementedException( + "Subpath on an edge is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + + if (edge_subpath->upper > 1) { + // Add the path-finding + AddPathFinding(select_node, from_clause, conditions, + previous_vertex_element->variable_binding, + edge_element->variable_binding, + next_vertex_element->variable_binding, edge_table, + edge_subpath); + } else { + alias_map[edge_element->variable_binding] = + edge_table->source_reference; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, + previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, + alias_map, extra_alias_counter); + } + } else { + // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + alias_map[edge_element->variable_binding] = edge_table->table_name; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, + previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, + extra_alias_counter); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR + // (b.dst = a.id AND b.src + // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) + } + previous_vertex_element = next_vertex_element; + previous_vertex_table = next_vertex_table; + } +} + +unique_ptr +PGQMatchFunction::MatchBindReplace(ClientContext &context, + TableFunctionBindInput &) { + auto duckpgq_state_entry = context.registered_state.find("duckpgq"); + auto duckpgq_state = + dynamic_cast(duckpgq_state_entry->second.get()); + + auto ref = dynamic_cast( + duckpgq_state->transform_expression.get()); + auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + + auto data = make_uniq(); + + vector> conditions; + + auto select_node = make_uniq(); + unordered_map alias_map; + unique_ptr from_clause; + + int32_t extra_alias_counter = 0; + for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { + auto &path_pattern = ref->path_patterns[idx_i]; + // Check if the element is PathElement or a Subpath with potentially many + // items + ProcessPathList(path_pattern->path_elements, conditions, from_clause, + select_node, alias_map, *pg_table, extra_alias_counter, + ref->column_list); + } + + // Go through all aliases encountered + for (auto &table_alias_entry : alias_map) { + auto table_ref = make_uniq(); + table_ref->table_name = table_alias_entry.second; + table_ref->alias = table_alias_entry.first; + + if (from_clause) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(from_clause); + new_root->right = std::move(table_ref); + from_clause = std::move(new_root); + } else { + from_clause = std::move(table_ref); + } + } + + select_node->from_table = std::move(from_clause); + + if (ref->where_clause) { + conditions.push_back(std::move(ref->where_clause)); + } + std::vector> final_column_list; + + for (auto &expression : ref->column_list) { + unordered_set named_subpaths; + auto column_ref = dynamic_cast(expression.get()); + if (column_ref != nullptr) { + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + final_column_list.emplace_back(make_uniq( + "path", column_ref->column_names[0])); + } else { + final_column_list.push_back(std::move(expression)); + } + continue; + } + auto function_ref = dynamic_cast(expression.get()); + if (function_ref != nullptr) { + if (function_ref->function_name == "path_length") { + column_ref = dynamic_cast( + function_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + auto path_ref = make_uniq( + "path", column_ref->column_names[0]); + vector> path_children; + path_children.push_back(std::move(path_ref)); + auto path_len = + make_uniq("len", std::move(path_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto div_expression = + make_uniq("//", std::move(div_children)); + div_expression->alias = "path_length_" + column_ref->column_names[0]; + final_column_list.emplace_back(std::move(div_expression)); + } + } else { + final_column_list.push_back(std::move(expression)); + } + + continue; + } + + final_column_list.push_back(std::move(expression)); + } + + select_node->where_clause = CreateWhereClause(conditions); + select_node->select_list = std::move(final_column_list); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery), ref->alias); + + return std::move(result); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp index d9ef7a7f..ba646aac 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp @@ -39,8 +39,8 @@ static void ScanCSREFunction(ClientContext &context, TableFunctionInput &data_p, FlatVector::SetData(output.data[0], (data_ptr_t)csr->e.data()); } -static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_p, - DataChunk &output) { +static void ScanCSRPtrFunction(ClientContext &context, + TableFunctionInput &data_p, DataChunk &output) { bool &gstate = ((CSRScanState &)*data_p.global_state).finished; if (gstate) { @@ -74,7 +74,7 @@ static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_ // the third element is the address of the weight array // the fifth element is the type of the weight array // 0 if the weights are integres, 1 if they are doubles, and 2 for unweighted - if(csr->w.size()) { + if (csr->w.size()) { result_data[2] = (uint64_t)(&(csr->w)); result_data[4] = (uint64_t)(0); } else if (csr->w_double.size()) { @@ -84,7 +84,8 @@ static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_ result_data[2] = (uint64_t)(0); result_data[4] = (uint64_t)(2); } - // we also need the number of elements in the vertex array, since its C-array not a vector. + // we also need the number of elements in the vertex array, since its C-array + // not a vector. result_data[3] = (uint64_t)(csr->vsize); } diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index ead1e2e6..1cadbe71 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -90,17 +90,17 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, SQLStatement &statement) { auto lookup = context.registered_state.find("duckpgq"); if (lookup == context.registered_state.end()) { - throw BinderException("Registered state not found"); - } - - auto duckpgq_state = (DuckPGQState *)lookup->second.get(); - auto duckpgq_binder = Binder::CreateBinder(context); - auto duckpgq_parse_data = - dynamic_cast(duckpgq_state->parse_data.get()); - if (duckpgq_parse_data) { - return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); - } - throw BinderException("Unable to find DuckPGQ Parse Data"); + throw BinderException("Registered state not found"); + } + + auto duckpgq_state = (DuckPGQState *)lookup->second.get(); + auto duckpgq_binder = Binder::CreateBinder(context); + auto duckpgq_parse_data = + dynamic_cast(duckpgq_state->parse_data.get()); + if (duckpgq_parse_data) { + return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); + } + throw BinderException("Unable to find DuckPGQ Parse Data"); } ParserExtensionPlanResult @@ -136,13 +136,15 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, function->children.pop_back(); } throw Exception("use duckpgq_bind instead"); - } if (statement->type == StatementType::CREATE_STATEMENT) { + } + if (statement->type == StatementType::CREATE_STATEMENT) { ParserExtensionPlanResult result; result.function = CreatePropertyGraphFunction(); result.requires_valid_transaction = true; result.return_type = StatementReturnType::QUERY_RESULT; return result; - } if (statement->type == StatementType::DROP_STATEMENT) { + } + if (statement->type == StatementType::DROP_STATEMENT) { ParserExtensionPlanResult result; result.function = DropPropertyGraphFunction(); result.requires_valid_transaction = true; From e5bff3bfed5eeddde876365a8722d8c63c0bdfb5 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:22:12 +0100 Subject: [PATCH 44/47] Format fix --- duckdb-pgq | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb-pgq b/duckdb-pgq index 82b4945b..bdbc6067 160000 --- a/duckdb-pgq +++ b/duckdb-pgq @@ -1 +1 @@ -Subproject commit 82b4945bf6ab44b15db8b95a7cac247f6842feb1 +Subproject commit bdbc6067198d19bf8d1455d1fb6aa534d0d2785e From e428913232408cfe04a05c00703c5f5c145c0f97 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:22:24 +0100 Subject: [PATCH 45/47] Version change github actions --- .github/workflows/Linux.yml | 2 +- .github/workflows/MacOS.yml | 2 +- .github/workflows/Windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 25393dee..82444adf 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: # Add commits/tags to build against other DuckDB versions - duckdb_version: [ 'v0.8.1' ] + duckdb_version: [ 'lattest' ] arch: ['linux_amd64', 'linux_arm64', 'linux_amd64_gcc4'] include: - arch: 'linux_amd64' diff --git a/.github/workflows/MacOS.yml b/.github/workflows/MacOS.yml index b610fb91..50b7b9a8 100644 --- a/.github/workflows/MacOS.yml +++ b/.github/workflows/MacOS.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: # Add commits/tags to build against other DuckDB versions - duckdb_version: [ 'v0.8.1' ] + duckdb_version: [ 'latest' ] env: OSX_BUILD_UNIVERSAL: 1 diff --git a/.github/workflows/Windows.yml b/.github/workflows/Windows.yml index a0c17956..3fecc14a 100644 --- a/.github/workflows/Windows.yml +++ b/.github/workflows/Windows.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: # Add commits/tags to build against other DuckDB versions - duckdb_version: [ 'v0.8.1' ] + duckdb_version: [ 'latest' ] steps: - uses: actions/checkout@v3 From 67494ceb42e5574b88367a6e5dc3c4713dfbca97 Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:24:32 +0100 Subject: [PATCH 46/47] Change file path --- .github/workflows/_extension_deploy.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/_extension_deploy.yml b/.github/workflows/_extension_deploy.yml index 53493dc1..686de0b1 100644 --- a/.github/workflows/_extension_deploy.yml +++ b/.github/workflows/_extension_deploy.yml @@ -47,7 +47,7 @@ on: matrix_parse_script: required: false type: string - default: "./duckdb/scripts/modify_distribution_matrix.py" + default: "./duckdb-pgq/scripts/modify_distribution_matrix.py" jobs: generate_matrix: @@ -63,13 +63,13 @@ jobs: - name: Checkout DuckDB to version run: | - cd duckdb + cd duckdb-pgq git checkout ${{ inputs.duckdb_version }} - id: parse-matrices run: | - cat ./duckdb/.github/config/distribution_matrix.json > distribution_matrix.json - grep wasm distribution_matrix.json || (head -n -1 ./duckdb/.github/config/distribution_matrix.json > distribution_matrix.json && echo ',"wasm":{"include":[{"duckdb_arch":"wasm_mvp","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_eh","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_threads","vcpkg_triplet":"wasm32-emscripten"}]}}' >> distribution_matrix.json) + cat ./duckdb-pgq/.github/config/distribution_matrix.json > distribution_matrix.json + grep wasm distribution_matrix.json || (head -n -1 ./duckdb-pgq/.github/config/distribution_matrix.json > distribution_matrix.json && echo ',"wasm":{"include":[{"duckdb_arch":"wasm_mvp","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_eh","vcpkg_triplet":"wasm32-emscripten"},{"duckdb_arch":"wasm_threads","vcpkg_triplet":"wasm32-emscripten"}]}}' >> distribution_matrix.json) python3 ${{ inputs.matrix_parse_script }} --input distribution_matrix.json --deploy_matrix --output deploy_matrix.json --exclude "${{ inputs.exclude_archs }}" --pretty deploy_matrix="`cat deploy_matrix.json`" echo deploy_matrix=$deploy_matrix >> $GITHUB_OUTPUT @@ -91,7 +91,7 @@ jobs: - name: Checkout DuckDB to version run: | - cd duckdb + cd duckdb-pgq git checkout ${{ inputs.duckdb_version }} - uses: actions/download-artifact@v2 @@ -112,7 +112,7 @@ jobs: pwd python3 -m pip install pip awscli git config --global --add safe.directory '*' - cd duckdb + cd duckdb-pgq git fetch --tags export DUCKDB_VERSION=`git tag --points-at HEAD` export DUCKDB_VERSION=${DUCKDB_VERSION:=`git log -1 --format=%h`} From 511d2cba0b7704a8c0205daf590f7d676d9f1a6f Mon Sep 17 00:00:00 2001 From: dtenwolde Date: Thu, 18 Jan 2024 14:30:02 +0100 Subject: [PATCH 47/47] Adding duckdb as submodule --- .github/workflows/MacOS.yml | 2 +- .gitmodules | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/MacOS.yml b/.github/workflows/MacOS.yml index 50b7b9a8..b652bb6e 100644 --- a/.github/workflows/MacOS.yml +++ b/.github/workflows/MacOS.yml @@ -39,7 +39,7 @@ jobs: cd duckdb-pgq git checkout ${{ matrix.duckdb_version }} - # Build extension + # Build extension - name: Build extension shell: bash run: | diff --git a/.gitmodules b/.gitmodules index 7e217514..2eae8b8d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,7 @@ path = duckdb-pgq url = git@github.com:cwida/duckdb-pgq.git branch = master +[submodule "duckdb"] + path = duckdb + url = git@github.com:duckdb/duckdb.git + branch = main