diff --git a/CMakeLists.txt b/CMakeLists.txt index 48cc12d4..97e93588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,28 +1,25 @@ cmake_minimum_required(VERSION 2.8.12) -# Set extension name here -#<<<<<<< HEAD -#set(TARGET_NAME duckpgq) +# Set extension name here <<<<<<< HEAD set(TARGET_NAME duckpgq) # -#set(EXTENSION_NAME ${TARGET_NAME}_extension) -#project(${TARGET_NAME}) -#set(CMAKE_CXX_STANDARD 11) +# set(EXTENSION_NAME ${TARGET_NAME}_extension) project(${TARGET_NAME}) +# set(CMAKE_CXX_STANDARD 11) # -#include_directories(duckpgq/include) -#add_subdirectory(duckpgq/src) +# include_directories(duckpgq/include) add_subdirectory(duckpgq/src) # -#include_directories(../duckdb-pgq/third_party/libpg_query/include) +# include_directories(../duckdb-pgq/third_party/libpg_query/include) # -#add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) +# add_library(${EXTENSION_NAME} STATIC ${EXTENSION_SOURCES}) # -#set(PARAMETERS "-warnings") -#build_loadable_extension(${TARGET_NAME} ${PARAMETERS} ${EXTENSION_SOURCES}) -#======= +# set(PARAMETERS "-warnings") build_loadable_extension(${TARGET_NAME} +# ${PARAMETERS} ${EXTENSION_SOURCES}) +# ======= set(TARGET_NAME duckpgq) set(CMAKE_CXX_STANDARD 11) -# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then -# used in cmake with find_package. Feel free to remove or replace with other dependencies. -# Note that it should also be removed from vcpkg.json to prevent needlessly installing it.. +# DuckDB's extension distribution supports vcpkg. As such, dependencies can be +# added in ./vcpkg.json and then used in cmake with find_package. Feel free to +# remove or replace with other dependencies. Note that it should also be removed +# from vcpkg.json to prevent needlessly installing it.. find_package(OpenSSL REQUIRED) set(EXTENSION_NAME ${TARGET_NAME}_extension) @@ -33,7 +30,7 @@ include_directories(duckpgq/include) add_subdirectory(duckpgq/src) include_directories(../duckdb-pgq/third_party/libpg_query/include) -#set(EXTENSION_SOURCES duckpgq/src/duckpgq_extension.cpp) +# set(EXTENSION_SOURCES duckpgq/src/duckpgq_extension.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) @@ -41,7 +38,7 @@ build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link OpenSSL in both the static library as the loadable extension target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) -#>>>>>>> template/main +# >>>>>>> template/main install( TARGETS ${EXTENSION_NAME} diff --git a/Makefile b/Makefile index 98f1215a..8531f624 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ test_release_python: release_python #### Misc format: - find src/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i + find duckpgq/ -iname *.hpp -o -iname *.cpp | xargs clang-format --sort-includes=0 -style=file -i cmake-format -i CMakeLists.txt update: git submodule update --remote --merge diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp index c746d3d7..4144d22a 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/match.hpp @@ -16,126 +16,131 @@ #include "duckdb/parser/path_pattern.hpp" namespace duckdb { - struct PGQMatchFunction : public TableFunction { - public: - PGQMatchFunction() { - name = "duckpgq_match"; - bind_replace = MatchBindReplace; - } - - struct MatchBindData : public TableFunctionData { - bool done = false; - }; - - - static shared_ptr - FindGraphTable(const string& label, CreatePropertyGraphInfo& pg_table); - - static void - CheckInheritance(const shared_ptr& tableref, - PathElement* element, - vector>& conditions); - - static void - CheckEdgeTableConstraints(const string& src_reference, - const string& dst_reference, - const shared_ptr& edge_table); - - static unique_ptr CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string& vertex_alias, const string& edge_alias); - - static PathElement* - GetPathElement(const unique_ptr& path_reference); - - static unique_ptr - GetCountTable(const shared_ptr& edge_table, - const string& prev_binding); - - static unique_ptr - GetJoinRef(const shared_ptr& edge_table, - const string& edge_binding, const string& prev_binding, - const string& next_binding); - - static unique_ptr CreateCountCTESubquery(); - - static unique_ptr - CreateCSRCTE(const shared_ptr& edge_table, - const string& edge_binding, const string& prev_binding, - const string& next_binding); - - static void EdgeTypeAny(const shared_ptr& edge_table, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeLeft(const shared_ptr& edge_table, - const string& next_table_name, - const string& prev_table_name, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeRight(const shared_ptr& edge_table, - const string& next_table_name, - const string& prev_table_name, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions); - - static void EdgeTypeLeftRight( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions, - unordered_map& alias_map, int32_t& extra_alias_counter); - - static PathElement* - HandleNestedSubPath(unique_ptr& path_reference, - vector>& conditions, - idx_t element_idx); - - static unique_ptr MatchBindReplace(ClientContext& context, - TableFunctionBindInput& input); - - static unique_ptr GenerateSubpathPatternSubquery( - unique_ptr& path_pattern, CreatePropertyGraphInfo* pg_table, - vector>& column_list, - unordered_set& named_subpaths); - - static unique_ptr - CreatePathFindingFunction(vector> &path_list, CreatePropertyGraphInfo &pg_table); - - - static void AddPathFinding(const unique_ptr& select_node, - unique_ptr& from_clause, - vector>& conditions, - const string& prev_binding, const string& edge_binding, const string& next_binding, - const shared_ptr& edge_table, - const SubPath* subpath); - - static void AddEdgeJoins(const unique_ptr& select_node, - const shared_ptr& edge_table, - const shared_ptr& previous_vertex_table, - const shared_ptr& next_vertex_table, - PGQMatchType edge_type, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions, - unordered_map& alias_map, - int32_t& extra_alias_counter); - - static void ProcessPathList(vector>& path_pattern, - vector>& conditions, - unique_ptr& from_clause, unique_ptr& select_node, - unordered_map& alias_map, - CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, - vector>& column_list); - - static void CheckNamedSubpath(SubPath &subpath, vector>& column_list, - CreatePropertyGraphInfo &pg_table); - }; +struct PGQMatchFunction : public TableFunction { +public: + PGQMatchFunction() { + name = "duckpgq_match"; + bind_replace = MatchBindReplace; + } + + struct MatchBindData : public TableFunctionData { + bool done = false; + }; + + static shared_ptr + FindGraphTable(const string &label, CreatePropertyGraphInfo &pg_table); + + static void + CheckInheritance(const shared_ptr &tableref, + PathElement *element, + vector> &conditions); + + static void + CheckEdgeTableConstraints(const string &src_reference, + const string &dst_reference, + const shared_ptr &edge_table); + + static unique_ptr CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string &vertex_alias, const string &edge_alias); + + static PathElement * + GetPathElement(const unique_ptr &path_reference); + + static unique_ptr + GetCountTable(const shared_ptr &edge_table, + const string &prev_binding); + + static unique_ptr + GetJoinRef(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding); + + static unique_ptr CreateCountCTESubquery(); + + static unique_ptr + CreateCSRCTE(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding); + + static void EdgeTypeAny(const shared_ptr &edge_table, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void EdgeTypeLeft(const shared_ptr &edge_table, + const string &next_table_name, + const string &prev_table_name, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void EdgeTypeRight(const shared_ptr &edge_table, + const string &next_table_name, + const string &prev_table_name, + const string &edge_binding, + const string &prev_binding, + const string &next_binding, + vector> &conditions); + + static void + EdgeTypeLeftRight(const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter); + + static PathElement * + HandleNestedSubPath(unique_ptr &path_reference, + vector> &conditions, + idx_t element_idx); + + static unique_ptr MatchBindReplace(ClientContext &context, + TableFunctionBindInput &input); + + static unique_ptr GenerateSubpathPatternSubquery( + unique_ptr &path_pattern, CreatePropertyGraphInfo *pg_table, + vector> &column_list, + unordered_set &named_subpaths); + + static unique_ptr + CreatePathFindingFunction(vector> &path_list, + CreatePropertyGraphInfo &pg_table); + + static void AddPathFinding(const unique_ptr &select_node, + unique_ptr &from_clause, + vector> &conditions, + const string &prev_binding, + const string &edge_binding, + const string &next_binding, + const shared_ptr &edge_table, + const SubPath *subpath); + + static void + AddEdgeJoins(const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + unordered_map &alias_map, + int32_t &extra_alias_counter); + + static void ProcessPathList( + vector> &path_pattern, + vector> &conditions, + unique_ptr &from_clause, unique_ptr &select_node, + unordered_map &alias_map, + CreatePropertyGraphInfo &pg_table, int32_t &extra_alias_counter, + vector> &column_list); + + static void + CheckNamedSubpath(SubPath &subpath, + vector> &column_list, + CreatePropertyGraphInfo &pg_table); +}; } // namespace duckdb diff --git a/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp b/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp index d9f466ff..89f2a2ee 100644 --- a/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp +++ b/duckpgq/include/duckpgq/functions/tablefunctions/pgq_scan.hpp @@ -33,13 +33,13 @@ struct CSRScanPtrData : public TableFunctionData { public: static unique_ptr ScanCSRPtrBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { + vector &return_types, vector &names) { auto result = make_uniq(); result->csr_id = input.inputs[0].GetValue(); return_types.emplace_back(LogicalType::UBIGINT); names.emplace_back("ptr"); return std::move(result); - } + } public: int32_t csr_id; diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp index 6c219f19..f84fc1af 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/create_property_graph.cpp @@ -94,9 +94,9 @@ CreatePropertyGraphFunction::CreatePropertyGraphBind( CheckPropertyGraphTableLabels(vertex_table, table); v_table_names.insert(vertex_table->table_name); - if (vertex_table->hasTableNameAlias()) { - v_table_names.insert(vertex_table->table_name_alias); - } + if (vertex_table->hasTableNameAlias()) { + v_table_names.insert(vertex_table->table_name_alias); + } } for (auto &edge_table : info->edge_tables) { diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp index 860f1f79..61fcec74 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/match.cpp @@ -24,951 +24,987 @@ #include - namespace duckdb { - shared_ptr - PGQMatchFunction::FindGraphTable(const string& label, - CreatePropertyGraphInfo& pg_table) { - const auto graph_table_entry = pg_table.label_map.find(label); - if (graph_table_entry == pg_table.label_map.end()) { - throw BinderException("The label %s is not registered in property graph %s", - label, pg_table.property_graph_name); - } - - return graph_table_entry->second; - } - - void PGQMatchFunction::CheckInheritance( - const shared_ptr& tableref, PathElement* element, - vector>& conditions) { - if (tableref->main_label == element->label) { - return; - } - auto constant_expression_two = - make_uniq(Value::INTEGER((int32_t) 2)); - const auto itr = std::find(tableref->sub_labels.begin(), tableref->sub_labels.end(), - element->label); - - const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); - auto constant_expression_idx_label = - make_uniq(Value::INTEGER(static_cast(idx_of_element))); - - vector> power_of_children; - power_of_children.push_back(std::move(constant_expression_two)); - power_of_children.push_back(std::move(constant_expression_idx_label)); - auto power_of_term = - make_uniq("power", std::move(power_of_children)); - auto bigint_cast = - make_uniq(LogicalType::BIGINT, std::move(power_of_term)); - auto subcategory_colref = make_uniq( - tableref->discriminator, element->variable_binding); - vector> and_children; - and_children.push_back(std::move(subcategory_colref)); - and_children.push_back(std::move(bigint_cast)); - - auto and_expression = - make_uniq("&", std::move(and_children)); - - auto constant_expression_idx_label_comparison = make_uniq( - Value::INTEGER(static_cast(idx_of_element + 1))); - - auto subset_compare = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(and_expression), - std::move(constant_expression_idx_label_comparison)); - conditions.push_back(std::move(subset_compare)); - } - - void PGQMatchFunction::CheckEdgeTableConstraints( - const string& src_reference, const string& dst_reference, - const shared_ptr& edge_table) { - if (src_reference != edge_table->source_reference) { - throw BinderException("Label %s is not registered as a source reference " - "for edge pattern of table %s", - src_reference, edge_table->table_name); - } - if (dst_reference != edge_table->destination_reference) { - throw BinderException("Label %s is not registered as a destination " - "reference for edge pattern of table %s", - src_reference, edge_table->table_name); - } - } - - unique_ptr PGQMatchFunction::CreateMatchJoinExpression( - vector vertex_keys, vector edge_keys, - const string& vertex_alias, const string& edge_alias) { - vector> conditions; - - if (vertex_keys.size() != edge_keys.size()) { - throw BinderException("Vertex columns and edge columns size mismatch"); - } - for (idx_t i = 0; i < vertex_keys.size(); i++) { - auto vertex_colref = - make_uniq(vertex_keys[i], vertex_alias); - auto edge_colref = make_uniq(edge_keys[i], edge_alias); - conditions.push_back(make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), - std::move(edge_colref))); - } - unique_ptr where_clause; - - for (auto& condition: conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - - return where_clause; - } - - PathElement* PGQMatchFunction::GetPathElement( - const unique_ptr& path_reference) { - if (path_reference->path_reference_type == - PGQPathReferenceType::PATH_ELEMENT) { - return reinterpret_cast(path_reference.get()); - } - if (path_reference->path_reference_type == - PGQPathReferenceType::SUBPATH) { - return nullptr; - } - throw InternalException("Unknown path reference type detected"); - } - - unique_ptr - PGQMatchFunction::GetCountTable(const shared_ptr& edge_table, - const string& prev_binding) { - // SELECT count(s.id) FROM src s - auto select_count = make_uniq(); - auto select_inner = make_uniq(); - auto ref = make_uniq(); - - ref->table_name = edge_table->source_reference; - ref->alias = prev_binding; - select_inner->from_table = std::move(ref); - vector> children; - children.push_back( - make_uniq(edge_table->source_pk[0], prev_binding)); - - auto count_function = - make_uniq("count", std::move(children)); - select_inner->select_list.push_back(std::move(count_function)); - select_count->node = std::move(select_inner); - auto result = make_uniq(); - result->subquery = std::move(select_count); - result->subquery_type = SubqueryType::SCALAR; - return result; - } - - unique_ptr - PGQMatchFunction::GetJoinRef(const shared_ptr& edge_table, - const string& edge_binding, - const string& prev_binding, - const string& next_binding) { - auto first_join_ref = make_uniq(JoinRefType::REGULAR); - first_join_ref->type = JoinType::INNER; - - auto second_join_ref = make_uniq(JoinRefType::REGULAR); - second_join_ref->type = JoinType::INNER; - - auto edge_base_ref = make_uniq(); - edge_base_ref->table_name = edge_table->table_name; - edge_base_ref->alias = edge_binding; - auto src_base_ref = make_uniq(); - src_base_ref->table_name = edge_table->source_reference; - src_base_ref->alias = prev_binding; - second_join_ref->left = std::move(edge_base_ref); - second_join_ref->right = std::move(src_base_ref); - auto t_from_ref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto src_cid_ref = - make_uniq(edge_table->source_pk[0], prev_binding); - second_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), - std::move(src_cid_ref)); - auto dst_base_ref = make_uniq(); - dst_base_ref->table_name = edge_table->destination_reference; - dst_base_ref->alias = next_binding; - first_join_ref->left = std::move(second_join_ref); - first_join_ref->right = std::move(dst_base_ref); - - auto t_to_ref = make_uniq(edge_table->destination_fk[0], - edge_binding); - auto dst_cid_ref = make_uniq( - edge_table->destination_pk[0], next_binding); - first_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), - std::move(dst_cid_ref)); - return first_join_ref; - } - - unique_ptr PGQMatchFunction::CreateCountCTESubquery() { - //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x - - auto temp_cte_select_node = make_uniq(); - - auto cte_table_ref = make_uniq(); - - cte_table_ref->table_name = "cte1"; - temp_cte_select_node->from_table = std::move(cte_table_ref); - vector> children; - children.push_back(make_uniq("temp", "cte1")); - - auto count_function = - make_uniq("count", std::move(children)); - - auto zero = make_uniq(Value::INTEGER((int32_t) 0)); - - vector> multiply_children; - - multiply_children.push_back(std::move(zero)); - multiply_children.push_back(std::move(count_function)); - auto multiply_function = - make_uniq("multiply", std::move(multiply_children)); - multiply_function->alias = "temp"; - temp_cte_select_node->select_list.push_back(std::move(multiply_function)); - auto temp_cte_select_statement = make_uniq(); - temp_cte_select_statement->node = std::move(temp_cte_select_node); - - auto temp_cte_select_subquery = - make_uniq(std::move(temp_cte_select_statement), "__x"); - //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x - return temp_cte_select_subquery; - } - - unique_ptr - PGQMatchFunction::CreateCSRCTE(const shared_ptr& edge_table, - const string& prev_binding, - const string& edge_binding, - const string& next_binding) { - auto csr_edge_id_constant = - make_uniq(Value::INTEGER(0)); - auto count_create_edge_select = GetCountTable(edge_table, prev_binding); - - auto cast_subquery_expr = make_uniq(); - auto cast_select_node = make_uniq(); - - vector> csr_vertex_children; - csr_vertex_children.push_back( - make_uniq(Value::INTEGER(0))); - - auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); - - csr_vertex_children.push_back(std::move(count_create_vertex_expr)); - - csr_vertex_children.push_back( - make_uniq("dense_id", "sub")); - csr_vertex_children.push_back(make_uniq("cnt", "sub")); - - auto create_vertex_function = make_uniq( - "create_csr_vertex", std::move(csr_vertex_children)); - vector> sum_children; - sum_children.push_back(std::move(create_vertex_function)); - auto sum_function = - make_uniq("sum", std::move(sum_children)); - - auto inner_select_statement = make_uniq(); - auto inner_select_node = make_uniq(); - - auto source_rowid_colref = - make_uniq("rowid", prev_binding); - source_rowid_colref->alias = "dense_id"; - - auto count_create_inner_expr = make_uniq(); - count_create_inner_expr->subquery_type = SubqueryType::SCALAR; - auto edge_src_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - vector> inner_count_children; - inner_count_children.push_back(std::move(edge_src_colref)); - auto inner_count_function = - make_uniq("count", std::move(inner_count_children)); - inner_count_function->alias = "cnt"; - - inner_select_node->select_list.push_back(std::move(source_rowid_colref)); - inner_select_node->select_list.push_back(std::move(inner_count_function)); - auto source_rowid_colref_1 = - make_uniq("rowid", prev_binding); - expression_map_t grouping_expression_map; - inner_select_node->groups.group_expressions.push_back( - std::move(source_rowid_colref_1)); - GroupingSet grouping_set = {0}; - inner_select_node->groups.grouping_sets.push_back(grouping_set); - - auto inner_join_ref = make_uniq(JoinRefType::REGULAR); - inner_join_ref->type = JoinType::LEFT; - auto left_base_ref = make_uniq(); - left_base_ref->table_name = edge_table->source_reference; - left_base_ref->alias = prev_binding; - auto right_base_ref = make_uniq(); - right_base_ref->table_name = edge_table->table_name; - right_base_ref->alias = edge_binding; - inner_join_ref->left = std::move(left_base_ref); - inner_join_ref->right = std::move(right_base_ref); - - auto edge_join_colref = - make_uniq(edge_table->source_fk[0], edge_binding); - auto vertex_join_colref = - make_uniq(edge_table->source_pk[0], prev_binding); - - inner_join_ref->condition = make_uniq( - ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), - std::move(vertex_join_colref)); - inner_select_node->from_table = std::move(inner_join_ref); - inner_select_statement->node = std::move(inner_select_node); - - auto inner_from_subquery = - make_uniq(std::move(inner_select_statement), "sub"); - - cast_select_node->from_table = std::move(inner_from_subquery); - - cast_select_node->select_list.push_back(std::move(sum_function)); - auto cast_select_stmt = make_uniq(); - cast_select_stmt->node = std::move(cast_select_node); - cast_subquery_expr->subquery = std::move(cast_select_stmt); - cast_subquery_expr->subquery_type = SubqueryType::SCALAR; - - auto src_rowid_colref = make_uniq("rowid", prev_binding); - auto dst_rowid_colref = make_uniq("rowid", next_binding); - auto edge_rowid_colref = - make_uniq("rowid", edge_binding); - - auto cast_expression = make_uniq( - LogicalType::BIGINT, std::move(cast_subquery_expr)); - - vector> csr_edge_children; - csr_edge_children.push_back(std::move(csr_edge_id_constant)); - csr_edge_children.push_back(std::move(count_create_edge_select)); - csr_edge_children.push_back(std::move(cast_expression)); - csr_edge_children.push_back(std::move(src_rowid_colref)); - csr_edge_children.push_back(std::move(dst_rowid_colref)); - csr_edge_children.push_back(std::move(edge_rowid_colref)); - - auto outer_select_node = make_uniq(); - - auto create_csr_edge_function = make_uniq( - "create_csr_edge", std::move(csr_edge_children)); - create_csr_edge_function->alias = "temp"; - - outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); - outer_select_node->from_table = - GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); - auto outer_select_statement = make_uniq(); - - outer_select_statement->node = std::move(outer_select_node); - auto info = make_uniq(); - info->query = std::move(outer_select_statement); - return info; - } - - void PGQMatchFunction::EdgeTypeAny( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - // (a) src.key = edge.src - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, - prev_binding, edge_binding); - // (b) dst.key = edge.dst - auto dst_left_expr = CreateMatchJoinExpression( - edge_table->destination_pk, edge_table->destination_fk, - next_binding, edge_binding); - // (a) AND (b) - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - // (c) src.key = edge.dst - auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - // (d) dst.key = edge.src - auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->source_fk, - next_binding, edge_binding); - // (c) AND (d) - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - // ((a) AND (b)) OR ((c) AND (d)) - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); - } - - void PGQMatchFunction::EdgeTypeLeft( - const shared_ptr& edge_table, const string& next_table_name, - const string& prev_table_name, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - next_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding)); - } - - void PGQMatchFunction::EdgeTypeRight( - const shared_ptr& edge_table, const string& next_table_name, - const string& prev_table_name, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions) { - CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); - conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, - edge_table->source_fk, - prev_binding, edge_binding)); - conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - next_binding, edge_binding)); - } - - void PGQMatchFunction::EdgeTypeLeftRight( - const shared_ptr& edge_table, const string& edge_binding, - const string& prev_binding, const string& next_binding, - vector>& conditions, - unordered_map& alias_map, int32_t& extra_alias_counter) { - auto src_left_expr = CreateMatchJoinExpression( - edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); - auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, - edge_table->destination_fk, - prev_binding, edge_binding); - - auto combined_left_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), - std::move(dst_left_expr)); - - const auto additional_edge_alias = - edge_binding + std::to_string(extra_alias_counter); - extra_alias_counter++; - - alias_map[additional_edge_alias] = edge_table->table_name; - - auto src_right_expr = - CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, - prev_binding, additional_edge_alias); - auto dst_right_expr = CreateMatchJoinExpression( - edge_table->destination_pk, edge_table->destination_fk, next_binding, - additional_edge_alias); - auto combined_right_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), - std::move(dst_right_expr)); - - auto combined_expr = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), - std::move(combined_right_expr)); - conditions.push_back(std::move(combined_expr)); - } - - PathElement* PGQMatchFunction::HandleNestedSubPath( - unique_ptr& path_reference, - vector>& conditions, idx_t element_idx) { - auto subpath = reinterpret_cast(path_reference.get()); - return GetPathElement(subpath->path_list[element_idx]); - } - - unique_ptr - CreateWhereClause(vector>& conditions) { - unique_ptr where_clause; - for (auto& condition: conditions) { - if (where_clause) { - where_clause = make_uniq( - ExpressionType::CONJUNCTION_AND, std::move(where_clause), - std::move(condition)); - } else { - where_clause = std::move(condition); - } - } - return where_clause; - } - - unique_ptr PGQMatchFunction::CreatePathFindingFunction( - vector>& path_list, CreatePropertyGraphInfo& pg_table) { - // This method will return a SubqueryRef of a list of rowids - // For every vertex and edge element, we add the rowid to the list using list_append, or list_prepend - // The difficulty is that there may be a (un)bounded path pattern at some point in the query - // This is computed using the shortestpath() UDF and returns a list. - // This list will be part of the full list of element rowids, using list_concat. - // For now we will only support returning rowids - unique_ptr final_list; - - auto previous_vertex_element = GetPathElement(path_list[0]); - if (!previous_vertex_element) { - // We hit a vertex element with a WHERE, but we only care about the rowid here - auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); - previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); - } - - for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { - auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = reinterpret_cast(path_list[idx_i + 1].get()); - next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); - } - - auto edge_element = GetPathElement(path_list[idx_i]); - if (!edge_element) { - auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); - if (edge_subpath->upper > 1) { - // (un)bounded shortest path - // Add the shortest path UDF - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, pg_table); - auto src_row_id = make_uniq("rowid", previous_vertex_element->variable_binding); - auto dst_row_id = make_uniq("rowid", next_vertex_element->variable_binding); - auto csr_id = make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back( - std::move(GetCountTable(edge_table, previous_vertex_element->variable_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto shortest_path_function = make_uniq("shortestpath", - std::move(pathfinding_children)); - - if (!final_list) { - final_list = std::move(shortest_path_function); - } else { - auto pop_front_shortest_path_children = vector>(); - pop_front_shortest_path_children.push_back(std::move(shortest_path_function)); - auto pop_front = make_uniq("array_pop_front", - std::move(pop_front_shortest_path_children)); - - auto final_list_children = vector>(); - final_list_children.push_back(std::move(final_list)); - final_list_children.push_back(std::move(pop_front)); - final_list = make_uniq("list_concat", std::move(final_list_children)); - } - // Set next vertex to be previous - previous_vertex_element = next_vertex_element; - continue; - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - } - auto previous_rowid = make_uniq("rowid", previous_vertex_element->variable_binding); - auto edge_rowid = make_uniq("rowid", edge_element->variable_binding); - auto next_rowid = make_uniq("rowid", next_vertex_element->variable_binding); - auto starting_list_children = vector>(); - - if (!final_list) { - starting_list_children.push_back(std::move(previous_rowid)); - starting_list_children.push_back(std::move(edge_rowid)); - starting_list_children.push_back(std::move(next_rowid)); - final_list = make_uniq("list_value", std::move(starting_list_children)); - } else { - starting_list_children.push_back(std::move(edge_rowid)); - starting_list_children.push_back(std::move(next_rowid)); - auto next_elements_list = make_uniq("list_value", std::move(starting_list_children)); - auto final_list_children = vector>(); - final_list_children.push_back(std::move(final_list)); - final_list_children.push_back(std::move(next_elements_list)); - final_list = make_uniq("list_concat", std::move(final_list_children)); - } - previous_vertex_element = next_vertex_element; - } - - return final_list; - } - - void PGQMatchFunction::AddEdgeJoins(const unique_ptr& select_node, - const shared_ptr& edge_table, - const shared_ptr& previous_vertex_table, - const shared_ptr& next_vertex_table, - PGQMatchType edge_type, - const string& edge_binding, - const string& prev_binding, - const string& next_binding, - vector>& conditions, - unordered_map& alias_map, - int32_t& extra_alias_counter) { - switch (edge_type) { - case PGQMatchType::MATCH_EDGE_ANY: { - select_node->modifiers.push_back(make_uniq()); - EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, conditions); - break; - } - case PGQMatchType::MATCH_EDGE_LEFT: - EdgeTypeLeft(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_RIGHT: - EdgeTypeRight(edge_table, next_vertex_table->table_name, - previous_vertex_table->table_name, - edge_binding, prev_binding, next_binding, conditions); - break; - case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { - EdgeTypeLeftRight(edge_table, edge_binding, - prev_binding, next_binding, conditions, - alias_map, extra_alias_counter); - break; - } - default: - throw InternalException("Unknown match type found"); - } - } - - void PGQMatchFunction::AddPathFinding(const unique_ptr& select_node, - unique_ptr& from_clause, - vector>& conditions, - const string& prev_binding, const string& edge_binding, - const string& next_binding, - const shared_ptr& edge_table, - const SubPath* subpath) { - //! START - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - select_node->cte_map.map["cte1"] = CreateCSRCTE( - edge_table, prev_binding, - edge_binding, - next_binding); - - auto temp_cte_select_subquery = CreateCountCTESubquery(); - - if (from_clause) { - // create a cross join since there is already something in the - // from clause - auto from_join = make_uniq(JoinRefType::CROSS); - from_join->left = std::move(from_clause); - from_join->right = std::move(temp_cte_select_subquery); - from_clause = std::move(from_join); - } else { - from_clause = std::move(temp_cte_select_subquery); - } - //! END - //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x - - //! START - //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) - //! from dst c, a.rowid, b.rowid) between lower and upper - - auto src_row_id = make_uniq( - "rowid", prev_binding); - auto dst_row_id = make_uniq( - "rowid", next_binding); - auto csr_id = - make_uniq(Value::INTEGER(0)); - - vector> pathfinding_children; - pathfinding_children.push_back(std::move(csr_id)); - pathfinding_children.push_back(std::move(GetCountTable( - edge_table, prev_binding))); - pathfinding_children.push_back(std::move(src_row_id)); - pathfinding_children.push_back(std::move(dst_row_id)); - - auto reachability_function = make_uniq( - "iterativelength", std::move(pathfinding_children)); - - auto cte_col_ref = make_uniq("temp", "__x"); - - vector> addition_children; - addition_children.push_back(std::move(cte_col_ref)); - addition_children.push_back(std::move(reachability_function)); - - auto addition_function = make_uniq( - "add", std::move(addition_children)); - auto lower_limit = - make_uniq(Value::INTEGER(static_cast(subpath->lower))); - auto upper_limit = - make_uniq(Value::INTEGER(static_cast(subpath->upper))); - auto between_expression = make_uniq( - std::move(addition_function), std::move(lower_limit), - std::move(upper_limit)); - conditions.push_back(std::move(between_expression)); - - //! END - //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) - //! from src s, a.rowid, b.rowid) between lower and upper - } - - void PGQMatchFunction::CheckNamedSubpath(SubPath& subpath, vector>& column_list, - CreatePropertyGraphInfo& pg_table) { - for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { - FunctionExpression* parsed_ref = dynamic_cast(column_list[idx_i].get()); - if (parsed_ref == nullptr) { - continue; - } - auto column_ref = dynamic_cast(parsed_ref->children[0].get()); - if (column_ref == nullptr) { - continue; - } - // Trying to check parsed_ref->alias directly leads to a segfault - string column_alias = parsed_ref->alias; - - if (column_ref->column_names[0] != subpath.path_variable) { - continue; - } - if (parsed_ref->function_name == "element_id") { - // Check subpath name matches the column referenced in the function --> element_id(named_subpath) - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - - if (column_alias.empty()) { - shortest_path_function->alias = "element_id(" + subpath.path_variable + ")"; - } else { - shortest_path_function->alias = column_alias; - } - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(shortest_path_function)); - } else if (parsed_ref->function_name == "path_length") { - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - auto path_len_children = vector>(); - path_len_children.push_back(std::move(shortest_path_function)); - auto path_len = - make_uniq("len", std::move(path_len_children)); - auto constant_two = make_uniq(Value::INTEGER(2)); - vector> div_children; - div_children.push_back(std::move(path_len)); - div_children.push_back(std::move(constant_two)); - auto path_length_function = - make_uniq("//", std::move(div_children)); - path_length_function->alias = column_alias.empty() ? "path_length(" + subpath.path_variable + ")" : column_alias; - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(path_length_function)); - } else if (parsed_ref->function_name == "vertices" || parsed_ref->function_name == "edges") { - auto shortest_path_function = CreatePathFindingFunction(subpath.path_list, pg_table); - auto list_slice_children = vector>(); - list_slice_children.push_back(std::move(shortest_path_function)); - - if (parsed_ref->function_name == "vertices") { - list_slice_children.push_back(make_uniq(Value::INTEGER(1))); - } else { - list_slice_children.push_back(make_uniq(Value::INTEGER(2))); - } - auto slice_end = make_uniq(Value::INTEGER(-1)); - auto slice_step = make_uniq(Value::INTEGER(2)); - - list_slice_children.push_back(std::move(slice_end)); - list_slice_children.push_back(std::move(slice_step)); - auto list_slice = - make_uniq("list_slice", std::move(list_slice_children)); - if (parsed_ref->function_name == "vertices") { - list_slice->alias = column_alias.empty() ? "vertices(" + subpath.path_variable + ")" : column_alias; - } else { - list_slice->alias = column_alias.empty() ? "edges(" + subpath.path_variable + ")" : column_alias; - } - column_list.erase(column_list.begin() + idx_i); - column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); - } - } - } - - void PGQMatchFunction::ProcessPathList(vector>& path_list, - vector>& conditions, - unique_ptr& from_clause, unique_ptr& select_node, - unordered_map& alias_map, - CreatePropertyGraphInfo& pg_table, int32_t& extra_alias_counter, - vector>& column_list) { - PathElement* previous_vertex_element = - GetPathElement(path_list[0]); - if (!previous_vertex_element) { - const auto previous_vertex_subpath = reinterpret_cast(path_list[0].get()); - CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); - if (previous_vertex_subpath->where_clause) { - conditions.push_back(std::move(previous_vertex_subpath->where_clause)); - } - if (previous_vertex_subpath->path_list.size() == 1) { - previous_vertex_element = GetPathElement(previous_vertex_subpath->path_list[0]); - } else { - // Add the shortest path if the name is found in the column_list - ProcessPathList(previous_vertex_subpath->path_list, conditions, from_clause, select_node, - alias_map, pg_table, extra_alias_counter, column_list); - return; - } - } - auto previous_vertex_table = - FindGraphTable(previous_vertex_element->label, pg_table); - CheckInheritance(previous_vertex_table, previous_vertex_element, - conditions); - alias_map[previous_vertex_element->variable_binding] = - previous_vertex_table->table_name; - - for (idx_t idx_j = 1; - idx_j < path_list.size(); - idx_j = idx_j + 2) { - PathElement* next_vertex_element = - GetPathElement(path_list[idx_j + 1]); - if (!next_vertex_element) { - auto next_vertex_subpath = - reinterpret_cast(path_list[idx_j + 1].get()); - if (next_vertex_subpath->path_list.size() > 1) { - throw NotImplementedException("Recursive patterns are not yet supported."); - } - if (next_vertex_subpath->where_clause) { - conditions.push_back(std::move(next_vertex_subpath->where_clause)); - } - next_vertex_element = - GetPathElement(next_vertex_subpath->path_list[0]); - } - if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || - previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { - throw BinderException("Vertex and edge patterns must be alternated."); - } - auto next_vertex_table = - FindGraphTable(next_vertex_element->label, pg_table); - CheckInheritance(next_vertex_table, next_vertex_element, conditions); - alias_map[next_vertex_element->variable_binding] = next_vertex_table->table_name; - - PathElement* edge_element = - GetPathElement(path_list[idx_j]); - if (!edge_element) { - // We are dealing with a subpath - auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); - if (edge_subpath->where_clause) { - conditions.push_back(std::move(edge_subpath->where_clause)); - } - if (edge_subpath->path_list.size() > 1) { - throw NotImplementedException("Subpath on an edge is not yet supported."); - } - edge_element = GetPathElement(edge_subpath->path_list[0]); - auto edge_table = FindGraphTable(edge_element->label, pg_table); - - if (edge_subpath->upper > 1) { - // Add the path-finding - AddPathFinding(select_node, from_clause, conditions, - previous_vertex_element->variable_binding, - edge_element->variable_binding, - next_vertex_element->variable_binding, - edge_table, edge_subpath); - } else { - alias_map[edge_element->variable_binding] = edge_table->source_reference; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, - edge_element->variable_binding, previous_vertex_element->variable_binding, - next_vertex_element->variable_binding, conditions, alias_map, extra_alias_counter); - } - } else { - // The edge element is a path element without WHERE or path-finding. - auto edge_table = FindGraphTable(edge_element->label, pg_table); - CheckInheritance(edge_table, edge_element, conditions); - // check aliases - alias_map[edge_element->variable_binding] = edge_table->table_name; - AddEdgeJoins(select_node, edge_table, previous_vertex_table, - next_vertex_table, edge_element->match_type, edge_element->variable_binding, - previous_vertex_element->variable_binding, next_vertex_element->variable_binding, - conditions, alias_map, extra_alias_counter); - // Check the edge type - // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id - // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id - // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR - // (b.dst = a.id AND b.src - // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND - // (b.dst = a.id AND b.src - //= c.id) - } - previous_vertex_element = next_vertex_element; - previous_vertex_table = next_vertex_table; - } - } - - - unique_ptr PGQMatchFunction::MatchBindReplace(ClientContext& context, - TableFunctionBindInput&) { - auto duckpgq_state_entry = context.registered_state.find("duckpgq"); - auto duckpgq_state = dynamic_cast(duckpgq_state_entry->second.get()); - - auto ref = dynamic_cast( - duckpgq_state->transform_expression.get()); - auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); - - auto data = make_uniq(); - - vector> conditions; - - auto select_node = make_uniq(); - unordered_map alias_map; - unique_ptr from_clause; - - int32_t extra_alias_counter = 0; - for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { - auto& path_pattern = ref->path_patterns[idx_i]; - // Check if the element is PathElement or a Subpath with potentially many items - ProcessPathList(path_pattern->path_elements, conditions, from_clause, select_node, - alias_map, *pg_table, extra_alias_counter, ref->column_list); - } - - // Go through all aliases encountered - for (auto& table_alias_entry: alias_map) { - auto table_ref = make_uniq(); - table_ref->table_name = table_alias_entry.second; - table_ref->alias = table_alias_entry.first; - - if (from_clause) { - auto new_root = make_uniq(JoinRefType::CROSS); - new_root->left = std::move(from_clause); - new_root->right = std::move(table_ref); - from_clause = std::move(new_root); - } else { - from_clause = std::move(table_ref); - } - } - - select_node->from_table = std::move(from_clause); - - if (ref->where_clause) { - conditions.push_back(std::move(ref->where_clause)); - } - std::vector> final_column_list; - - for (auto& expression: ref->column_list) { - unordered_set named_subpaths; - auto column_ref = dynamic_cast(expression.get()); - if (column_ref != nullptr) { - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - final_column_list.emplace_back(make_uniq( - "path", column_ref->column_names[0])); - } else { - final_column_list.push_back(std::move(expression)); - } - continue; - } - auto function_ref = dynamic_cast(expression.get()); - if (function_ref != nullptr) { - if (function_ref->function_name == "path_length") { - column_ref = dynamic_cast( - function_ref->children[0].get()); - if (column_ref == nullptr) { - continue; - } - if (named_subpaths.count(column_ref->column_names[0]) && - column_ref->column_names.size() == 1) { - auto path_ref = make_uniq( - "path", column_ref->column_names[0]); - vector> path_children; - path_children.push_back(std::move(path_ref)); - auto path_len = - make_uniq("len", std::move(path_children)); - auto constant_two = make_uniq(Value::INTEGER(2)); - vector> div_children; - div_children.push_back(std::move(path_len)); - div_children.push_back(std::move(constant_two)); - auto div_expression = - make_uniq("//", std::move(div_children)); - div_expression->alias = - "path_length_" + column_ref->column_names[0]; - final_column_list.emplace_back(std::move(div_expression)); - } - } else { - final_column_list.push_back(std::move(expression)); - } - - continue; - } - - final_column_list.push_back(std::move(expression)); - } - - select_node->where_clause = CreateWhereClause(conditions); - select_node->select_list = std::move(final_column_list); - - auto subquery = make_uniq(); - subquery->node = std::move(select_node); - - auto result = make_uniq(std::move(subquery), ref->alias); - - return std::move(result); - } +shared_ptr +PGQMatchFunction::FindGraphTable(const string &label, + CreatePropertyGraphInfo &pg_table) { + const auto graph_table_entry = pg_table.label_map.find(label); + if (graph_table_entry == pg_table.label_map.end()) { + throw BinderException("The label %s is not registered in property graph %s", + label, pg_table.property_graph_name); + } + + return graph_table_entry->second; +} + +void PGQMatchFunction::CheckInheritance( + const shared_ptr &tableref, PathElement *element, + vector> &conditions) { + if (tableref->main_label == element->label) { + return; + } + auto constant_expression_two = + make_uniq(Value::INTEGER((int32_t)2)); + const auto itr = std::find(tableref->sub_labels.begin(), + tableref->sub_labels.end(), element->label); + + const auto idx_of_element = std::distance(tableref->sub_labels.begin(), itr); + auto constant_expression_idx_label = make_uniq( + Value::INTEGER(static_cast(idx_of_element))); + + vector> power_of_children; + power_of_children.push_back(std::move(constant_expression_two)); + power_of_children.push_back(std::move(constant_expression_idx_label)); + auto power_of_term = + make_uniq("power", std::move(power_of_children)); + auto bigint_cast = + make_uniq(LogicalType::BIGINT, std::move(power_of_term)); + auto subcategory_colref = make_uniq( + tableref->discriminator, element->variable_binding); + vector> and_children; + and_children.push_back(std::move(subcategory_colref)); + and_children.push_back(std::move(bigint_cast)); + + auto and_expression = + make_uniq("&", std::move(and_children)); + + auto constant_expression_idx_label_comparison = make_uniq( + Value::INTEGER(static_cast(idx_of_element + 1))); + + auto subset_compare = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(and_expression), + std::move(constant_expression_idx_label_comparison)); + conditions.push_back(std::move(subset_compare)); +} + +void PGQMatchFunction::CheckEdgeTableConstraints( + const string &src_reference, const string &dst_reference, + const shared_ptr &edge_table) { + if (src_reference != edge_table->source_reference) { + throw BinderException("Label %s is not registered as a source reference " + "for edge pattern of table %s", + src_reference, edge_table->table_name); + } + if (dst_reference != edge_table->destination_reference) { + throw BinderException("Label %s is not registered as a destination " + "reference for edge pattern of table %s", + src_reference, edge_table->table_name); + } +} + +unique_ptr PGQMatchFunction::CreateMatchJoinExpression( + vector vertex_keys, vector edge_keys, + const string &vertex_alias, const string &edge_alias) { + vector> conditions; + + if (vertex_keys.size() != edge_keys.size()) { + throw BinderException("Vertex columns and edge columns size mismatch"); + } + for (idx_t i = 0; i < vertex_keys.size(); i++) { + auto vertex_colref = + make_uniq(vertex_keys[i], vertex_alias); + auto edge_colref = make_uniq(edge_keys[i], edge_alias); + conditions.push_back(make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(vertex_colref), + std::move(edge_colref))); + } + unique_ptr where_clause; + + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + + return where_clause; +} + +PathElement *PGQMatchFunction::GetPathElement( + const unique_ptr &path_reference) { + if (path_reference->path_reference_type == + PGQPathReferenceType::PATH_ELEMENT) { + return reinterpret_cast(path_reference.get()); + } + if (path_reference->path_reference_type == PGQPathReferenceType::SUBPATH) { + return nullptr; + } + throw InternalException("Unknown path reference type detected"); +} + +unique_ptr PGQMatchFunction::GetCountTable( + const shared_ptr &edge_table, + const string &prev_binding) { + // SELECT count(s.id) FROM src s + auto select_count = make_uniq(); + auto select_inner = make_uniq(); + auto ref = make_uniq(); + + ref->table_name = edge_table->source_reference; + ref->alias = prev_binding; + select_inner->from_table = std::move(ref); + vector> children; + children.push_back( + make_uniq(edge_table->source_pk[0], prev_binding)); + + auto count_function = + make_uniq("count", std::move(children)); + select_inner->select_list.push_back(std::move(count_function)); + select_count->node = std::move(select_inner); + auto result = make_uniq(); + result->subquery = std::move(select_count); + result->subquery_type = SubqueryType::SCALAR; + return result; +} + +unique_ptr +PGQMatchFunction::GetJoinRef(const shared_ptr &edge_table, + const string &edge_binding, + const string &prev_binding, + const string &next_binding) { + auto first_join_ref = make_uniq(JoinRefType::REGULAR); + first_join_ref->type = JoinType::INNER; + + auto second_join_ref = make_uniq(JoinRefType::REGULAR); + second_join_ref->type = JoinType::INNER; + + auto edge_base_ref = make_uniq(); + edge_base_ref->table_name = edge_table->table_name; + edge_base_ref->alias = edge_binding; + auto src_base_ref = make_uniq(); + src_base_ref->table_name = edge_table->source_reference; + src_base_ref->alias = prev_binding; + second_join_ref->left = std::move(edge_base_ref); + second_join_ref->right = std::move(src_base_ref); + auto t_from_ref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto src_cid_ref = + make_uniq(edge_table->source_pk[0], prev_binding); + second_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_from_ref), + std::move(src_cid_ref)); + auto dst_base_ref = make_uniq(); + dst_base_ref->table_name = edge_table->destination_reference; + dst_base_ref->alias = next_binding; + first_join_ref->left = std::move(second_join_ref); + first_join_ref->right = std::move(dst_base_ref); + + auto t_to_ref = make_uniq(edge_table->destination_fk[0], + edge_binding); + auto dst_cid_ref = make_uniq( + edge_table->destination_pk[0], next_binding); + first_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(t_to_ref), + std::move(dst_cid_ref)); + return first_join_ref; +} + +unique_ptr PGQMatchFunction::CreateCountCTESubquery() { + //! BEGIN OF (SELECT count(cte1.temp) as temp * 0 from cte1) __x + + auto temp_cte_select_node = make_uniq(); + + auto cte_table_ref = make_uniq(); + + cte_table_ref->table_name = "cte1"; + temp_cte_select_node->from_table = std::move(cte_table_ref); + vector> children; + children.push_back(make_uniq("temp", "cte1")); + + auto count_function = + make_uniq("count", std::move(children)); + + auto zero = make_uniq(Value::INTEGER((int32_t)0)); + + vector> multiply_children; + + multiply_children.push_back(std::move(zero)); + multiply_children.push_back(std::move(count_function)); + auto multiply_function = + make_uniq("multiply", std::move(multiply_children)); + multiply_function->alias = "temp"; + temp_cte_select_node->select_list.push_back(std::move(multiply_function)); + auto temp_cte_select_statement = make_uniq(); + temp_cte_select_statement->node = std::move(temp_cte_select_node); + + auto temp_cte_select_subquery = + make_uniq(std::move(temp_cte_select_statement), "__x"); + //! END OF (SELECT count(cte1.temp) * 0 as temp from cte1) __x + return temp_cte_select_subquery; +} + +unique_ptr +PGQMatchFunction::CreateCSRCTE(const shared_ptr &edge_table, + const string &prev_binding, + const string &edge_binding, + const string &next_binding) { + auto csr_edge_id_constant = make_uniq(Value::INTEGER(0)); + auto count_create_edge_select = GetCountTable(edge_table, prev_binding); + + auto cast_subquery_expr = make_uniq(); + auto cast_select_node = make_uniq(); + + vector> csr_vertex_children; + csr_vertex_children.push_back( + make_uniq(Value::INTEGER(0))); + + auto count_create_vertex_expr = GetCountTable(edge_table, prev_binding); + + csr_vertex_children.push_back(std::move(count_create_vertex_expr)); + + csr_vertex_children.push_back( + make_uniq("dense_id", "sub")); + csr_vertex_children.push_back(make_uniq("cnt", "sub")); + + auto create_vertex_function = make_uniq( + "create_csr_vertex", std::move(csr_vertex_children)); + vector> sum_children; + sum_children.push_back(std::move(create_vertex_function)); + auto sum_function = + make_uniq("sum", std::move(sum_children)); + + auto inner_select_statement = make_uniq(); + auto inner_select_node = make_uniq(); + + auto source_rowid_colref = + make_uniq("rowid", prev_binding); + source_rowid_colref->alias = "dense_id"; + + auto count_create_inner_expr = make_uniq(); + count_create_inner_expr->subquery_type = SubqueryType::SCALAR; + auto edge_src_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + vector> inner_count_children; + inner_count_children.push_back(std::move(edge_src_colref)); + auto inner_count_function = + make_uniq("count", std::move(inner_count_children)); + inner_count_function->alias = "cnt"; + + inner_select_node->select_list.push_back(std::move(source_rowid_colref)); + inner_select_node->select_list.push_back(std::move(inner_count_function)); + auto source_rowid_colref_1 = + make_uniq("rowid", prev_binding); + expression_map_t grouping_expression_map; + inner_select_node->groups.group_expressions.push_back( + std::move(source_rowid_colref_1)); + GroupingSet grouping_set = {0}; + inner_select_node->groups.grouping_sets.push_back(grouping_set); + + auto inner_join_ref = make_uniq(JoinRefType::REGULAR); + inner_join_ref->type = JoinType::LEFT; + auto left_base_ref = make_uniq(); + left_base_ref->table_name = edge_table->source_reference; + left_base_ref->alias = prev_binding; + auto right_base_ref = make_uniq(); + right_base_ref->table_name = edge_table->table_name; + right_base_ref->alias = edge_binding; + inner_join_ref->left = std::move(left_base_ref); + inner_join_ref->right = std::move(right_base_ref); + + auto edge_join_colref = + make_uniq(edge_table->source_fk[0], edge_binding); + auto vertex_join_colref = + make_uniq(edge_table->source_pk[0], prev_binding); + + inner_join_ref->condition = make_uniq( + ExpressionType::COMPARE_EQUAL, std::move(edge_join_colref), + std::move(vertex_join_colref)); + inner_select_node->from_table = std::move(inner_join_ref); + inner_select_statement->node = std::move(inner_select_node); + + auto inner_from_subquery = + make_uniq(std::move(inner_select_statement), "sub"); + + cast_select_node->from_table = std::move(inner_from_subquery); + + cast_select_node->select_list.push_back(std::move(sum_function)); + auto cast_select_stmt = make_uniq(); + cast_select_stmt->node = std::move(cast_select_node); + cast_subquery_expr->subquery = std::move(cast_select_stmt); + cast_subquery_expr->subquery_type = SubqueryType::SCALAR; + + auto src_rowid_colref = make_uniq("rowid", prev_binding); + auto dst_rowid_colref = make_uniq("rowid", next_binding); + auto edge_rowid_colref = + make_uniq("rowid", edge_binding); + + auto cast_expression = make_uniq( + LogicalType::BIGINT, std::move(cast_subquery_expr)); + + vector> csr_edge_children; + csr_edge_children.push_back(std::move(csr_edge_id_constant)); + csr_edge_children.push_back(std::move(count_create_edge_select)); + csr_edge_children.push_back(std::move(cast_expression)); + csr_edge_children.push_back(std::move(src_rowid_colref)); + csr_edge_children.push_back(std::move(dst_rowid_colref)); + csr_edge_children.push_back(std::move(edge_rowid_colref)); + + auto outer_select_node = make_uniq(); + + auto create_csr_edge_function = make_uniq( + "create_csr_edge", std::move(csr_edge_children)); + create_csr_edge_function->alias = "temp"; + + outer_select_node->select_list.push_back(std::move(create_csr_edge_function)); + outer_select_node->from_table = + GetJoinRef(edge_table, edge_binding, prev_binding, next_binding); + auto outer_select_statement = make_uniq(); + + outer_select_statement->node = std::move(outer_select_node); + auto info = make_uniq(); + info->query = std::move(outer_select_statement); + return info; +} + +void PGQMatchFunction::EdgeTypeAny( + const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + // (a) src.key = edge.src + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, prev_binding, edge_binding); + // (b) dst.key = edge.dst + auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + next_binding, edge_binding); + // (a) AND (b) + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + // (c) src.key = edge.dst + auto src_right_expr = CreateMatchJoinExpression(edge_table->source_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + // (d) dst.key = edge.src + auto dst_right_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->source_fk, + next_binding, edge_binding); + // (c) AND (d) + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + // ((a) AND (b)) OR ((c) AND (d)) + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_OR, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); +} + +void PGQMatchFunction::EdgeTypeLeft( + const shared_ptr &edge_table, + const string &next_table_name, const string &prev_table_name, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(next_table_name, prev_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + next_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeRight( + const shared_ptr &edge_table, + const string &next_table_name, const string &prev_table_name, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions) { + CheckEdgeTableConstraints(prev_table_name, next_table_name, edge_table); + conditions.push_back(CreateMatchJoinExpression(edge_table->source_pk, + edge_table->source_fk, + prev_binding, edge_binding)); + conditions.push_back(CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + next_binding, edge_binding)); +} + +void PGQMatchFunction::EdgeTypeLeftRight( + const shared_ptr &edge_table, + const string &edge_binding, const string &prev_binding, + const string &next_binding, + vector> &conditions, + unordered_map &alias_map, int32_t &extra_alias_counter) { + auto src_left_expr = CreateMatchJoinExpression( + edge_table->source_pk, edge_table->source_fk, next_binding, edge_binding); + auto dst_left_expr = CreateMatchJoinExpression(edge_table->destination_pk, + edge_table->destination_fk, + prev_binding, edge_binding); + + auto combined_left_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_left_expr), + std::move(dst_left_expr)); + + const auto additional_edge_alias = + edge_binding + std::to_string(extra_alias_counter); + extra_alias_counter++; + + alias_map[additional_edge_alias] = edge_table->table_name; + + auto src_right_expr = + CreateMatchJoinExpression(edge_table->source_pk, edge_table->source_fk, + prev_binding, additional_edge_alias); + auto dst_right_expr = CreateMatchJoinExpression( + edge_table->destination_pk, edge_table->destination_fk, next_binding, + additional_edge_alias); + auto combined_right_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(src_right_expr), + std::move(dst_right_expr)); + + auto combined_expr = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(combined_left_expr), + std::move(combined_right_expr)); + conditions.push_back(std::move(combined_expr)); +} + +PathElement *PGQMatchFunction::HandleNestedSubPath( + unique_ptr &path_reference, + vector> &conditions, idx_t element_idx) { + auto subpath = reinterpret_cast(path_reference.get()); + return GetPathElement(subpath->path_list[element_idx]); +} + +unique_ptr +CreateWhereClause(vector> &conditions) { + unique_ptr where_clause; + for (auto &condition : conditions) { + if (where_clause) { + where_clause = make_uniq( + ExpressionType::CONJUNCTION_AND, std::move(where_clause), + std::move(condition)); + } else { + where_clause = std::move(condition); + } + } + return where_clause; +} + +unique_ptr PGQMatchFunction::CreatePathFindingFunction( + vector> &path_list, + CreatePropertyGraphInfo &pg_table) { + // This method will return a SubqueryRef of a list of rowids + // For every vertex and edge element, we add the rowid to the list using + // list_append, or list_prepend The difficulty is that there may be a + // (un)bounded path pattern at some point in the query This is computed using + // the shortestpath() UDF and returns a list. This list will be part of the + // full list of element rowids, using list_concat. For now we will only + // support returning rowids + unique_ptr final_list; + + auto previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + // We hit a vertex element with a WHERE, but we only care about the rowid + // here + auto previous_vertex_subpath = + reinterpret_cast(path_list[0].get()); + previous_vertex_element = + GetPathElement(previous_vertex_subpath->path_list[0]); + } + + for (idx_t idx_i = 1; idx_i < path_list.size(); idx_i = idx_i + 2) { + auto next_vertex_element = GetPathElement(path_list[idx_i + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_list[idx_i + 1].get()); + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + + auto edge_element = GetPathElement(path_list[idx_i]); + if (!edge_element) { + auto edge_subpath = reinterpret_cast(path_list[idx_i].get()); + if (edge_subpath->upper > 1) { + // (un)bounded shortest path + // Add the shortest path UDF + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + auto src_row_id = make_uniq( + "rowid", previous_vertex_element->variable_binding); + auto dst_row_id = make_uniq( + "rowid", next_vertex_element->variable_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back(std::move(GetCountTable( + edge_table, previous_vertex_element->variable_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto shortest_path_function = make_uniq( + "shortestpath", std::move(pathfinding_children)); + + if (!final_list) { + final_list = std::move(shortest_path_function); + } else { + auto pop_front_shortest_path_children = + vector>(); + pop_front_shortest_path_children.push_back( + std::move(shortest_path_function)); + auto pop_front = make_uniq( + "array_pop_front", std::move(pop_front_shortest_path_children)); + + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(pop_front)); + final_list = make_uniq( + "list_concat", std::move(final_list_children)); + } + // Set next vertex to be previous + previous_vertex_element = next_vertex_element; + continue; + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + } + auto previous_rowid = make_uniq( + "rowid", previous_vertex_element->variable_binding); + auto edge_rowid = + make_uniq("rowid", edge_element->variable_binding); + auto next_rowid = make_uniq( + "rowid", next_vertex_element->variable_binding); + auto starting_list_children = vector>(); + + if (!final_list) { + starting_list_children.push_back(std::move(previous_rowid)); + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + final_list = make_uniq( + "list_value", std::move(starting_list_children)); + } else { + starting_list_children.push_back(std::move(edge_rowid)); + starting_list_children.push_back(std::move(next_rowid)); + auto next_elements_list = make_uniq( + "list_value", std::move(starting_list_children)); + auto final_list_children = vector>(); + final_list_children.push_back(std::move(final_list)); + final_list_children.push_back(std::move(next_elements_list)); + final_list = make_uniq( + "list_concat", std::move(final_list_children)); + } + previous_vertex_element = next_vertex_element; + } + + return final_list; +} + +void PGQMatchFunction::AddEdgeJoins( + const unique_ptr &select_node, + const shared_ptr &edge_table, + const shared_ptr &previous_vertex_table, + const shared_ptr &next_vertex_table, + PGQMatchType edge_type, const string &edge_binding, + const string &prev_binding, const string &next_binding, + vector> &conditions, + unordered_map &alias_map, int32_t &extra_alias_counter) { + switch (edge_type) { + case PGQMatchType::MATCH_EDGE_ANY: { + select_node->modifiers.push_back(make_uniq()); + EdgeTypeAny(edge_table, edge_binding, prev_binding, next_binding, + conditions); + break; + } + case PGQMatchType::MATCH_EDGE_LEFT: + EdgeTypeLeft(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, edge_binding, prev_binding, + next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_RIGHT: + EdgeTypeRight(edge_table, next_vertex_table->table_name, + previous_vertex_table->table_name, edge_binding, prev_binding, + next_binding, conditions); + break; + case PGQMatchType::MATCH_EDGE_LEFT_RIGHT: { + EdgeTypeLeftRight(edge_table, edge_binding, prev_binding, next_binding, + conditions, alias_map, extra_alias_counter); + break; + } + default: + throw InternalException("Unknown match type found"); + } +} + +void PGQMatchFunction::AddPathFinding( + const unique_ptr &select_node, + unique_ptr &from_clause, + vector> &conditions, + const string &prev_binding, const string &edge_binding, + const string &next_binding, + const shared_ptr &edge_table, const SubPath *subpath) { + //! START + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + select_node->cte_map.map["cte1"] = + CreateCSRCTE(edge_table, prev_binding, edge_binding, next_binding); + + auto temp_cte_select_subquery = CreateCountCTESubquery(); + + if (from_clause) { + // create a cross join since there is already something in the + // from clause + auto from_join = make_uniq(JoinRefType::CROSS); + from_join->left = std::move(from_clause); + from_join->right = std::move(temp_cte_select_subquery); + from_clause = std::move(from_join); + } else { + from_clause = std::move(temp_cte_select_subquery); + } + //! END + //! FROM (SELECT count(cte1.temp) * 0 as temp from cte1) __x + + //! START + //! WHERE __x.temp + iterativelength(, (SELECT count(c.id) + //! from dst c, a.rowid, b.rowid) between lower and upper + + auto src_row_id = make_uniq("rowid", prev_binding); + auto dst_row_id = make_uniq("rowid", next_binding); + auto csr_id = make_uniq(Value::INTEGER(0)); + + vector> pathfinding_children; + pathfinding_children.push_back(std::move(csr_id)); + pathfinding_children.push_back( + std::move(GetCountTable(edge_table, prev_binding))); + pathfinding_children.push_back(std::move(src_row_id)); + pathfinding_children.push_back(std::move(dst_row_id)); + + auto reachability_function = make_uniq( + "iterativelength", std::move(pathfinding_children)); + + auto cte_col_ref = make_uniq("temp", "__x"); + + vector> addition_children; + addition_children.push_back(std::move(cte_col_ref)); + addition_children.push_back(std::move(reachability_function)); + + auto addition_function = + make_uniq("add", std::move(addition_children)); + auto lower_limit = make_uniq( + Value::INTEGER(static_cast(subpath->lower))); + auto upper_limit = make_uniq( + Value::INTEGER(static_cast(subpath->upper))); + auto between_expression = make_uniq( + std::move(addition_function), std::move(lower_limit), + std::move(upper_limit)); + conditions.push_back(std::move(between_expression)); + + //! END + //! WHERE __x.temp + iterativelength(, (SELECT count(s.id) + //! from src s, a.rowid, b.rowid) between lower and upper +} + +void PGQMatchFunction::CheckNamedSubpath( + SubPath &subpath, vector> &column_list, + CreatePropertyGraphInfo &pg_table) { + for (idx_t idx_i = 0; idx_i < column_list.size(); idx_i++) { + FunctionExpression *parsed_ref = + dynamic_cast(column_list[idx_i].get()); + if (parsed_ref == nullptr) { + continue; + } + auto column_ref = + dynamic_cast(parsed_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + // Trying to check parsed_ref->alias directly leads to a segfault + string column_alias = parsed_ref->alias; + + if (column_ref->column_names[0] != subpath.path_variable) { + continue; + } + if (parsed_ref->function_name == "element_id") { + // Check subpath name matches the column referenced in the function --> + // element_id(named_subpath) + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + + if (column_alias.empty()) { + shortest_path_function->alias = + "element_id(" + subpath.path_variable + ")"; + } else { + shortest_path_function->alias = column_alias; + } + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, + std::move(shortest_path_function)); + } else if (parsed_ref->function_name == "path_length") { + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + auto path_len_children = vector>(); + path_len_children.push_back(std::move(shortest_path_function)); + auto path_len = + make_uniq("len", std::move(path_len_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto path_length_function = + make_uniq("//", std::move(div_children)); + path_length_function->alias = + column_alias.empty() ? "path_length(" + subpath.path_variable + ")" + : column_alias; + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, + std::move(path_length_function)); + } else if (parsed_ref->function_name == "vertices" || + parsed_ref->function_name == "edges") { + auto shortest_path_function = + CreatePathFindingFunction(subpath.path_list, pg_table); + auto list_slice_children = vector>(); + list_slice_children.push_back(std::move(shortest_path_function)); + + if (parsed_ref->function_name == "vertices") { + list_slice_children.push_back( + make_uniq(Value::INTEGER(1))); + } else { + list_slice_children.push_back( + make_uniq(Value::INTEGER(2))); + } + auto slice_end = make_uniq(Value::INTEGER(-1)); + auto slice_step = make_uniq(Value::INTEGER(2)); + + list_slice_children.push_back(std::move(slice_end)); + list_slice_children.push_back(std::move(slice_step)); + auto list_slice = make_uniq( + "list_slice", std::move(list_slice_children)); + if (parsed_ref->function_name == "vertices") { + list_slice->alias = column_alias.empty() + ? "vertices(" + subpath.path_variable + ")" + : column_alias; + } else { + list_slice->alias = column_alias.empty() + ? "edges(" + subpath.path_variable + ")" + : column_alias; + } + column_list.erase(column_list.begin() + idx_i); + column_list.insert(column_list.begin() + idx_i, std::move(list_slice)); + } + } +} + +void PGQMatchFunction::ProcessPathList( + vector> &path_list, + vector> &conditions, + unique_ptr &from_clause, unique_ptr &select_node, + unordered_map &alias_map, CreatePropertyGraphInfo &pg_table, + int32_t &extra_alias_counter, + vector> &column_list) { + PathElement *previous_vertex_element = GetPathElement(path_list[0]); + if (!previous_vertex_element) { + const auto previous_vertex_subpath = + reinterpret_cast(path_list[0].get()); + CheckNamedSubpath(*previous_vertex_subpath, column_list, pg_table); + if (previous_vertex_subpath->where_clause) { + conditions.push_back(std::move(previous_vertex_subpath->where_clause)); + } + if (previous_vertex_subpath->path_list.size() == 1) { + previous_vertex_element = + GetPathElement(previous_vertex_subpath->path_list[0]); + } else { + // Add the shortest path if the name is found in the column_list + ProcessPathList(previous_vertex_subpath->path_list, conditions, + from_clause, select_node, alias_map, pg_table, + extra_alias_counter, column_list); + return; + } + } + auto previous_vertex_table = + FindGraphTable(previous_vertex_element->label, pg_table); + CheckInheritance(previous_vertex_table, previous_vertex_element, conditions); + alias_map[previous_vertex_element->variable_binding] = + previous_vertex_table->table_name; + + for (idx_t idx_j = 1; idx_j < path_list.size(); idx_j = idx_j + 2) { + PathElement *next_vertex_element = GetPathElement(path_list[idx_j + 1]); + if (!next_vertex_element) { + auto next_vertex_subpath = + reinterpret_cast(path_list[idx_j + 1].get()); + if (next_vertex_subpath->path_list.size() > 1) { + throw NotImplementedException( + "Recursive patterns are not yet supported."); + } + if (next_vertex_subpath->where_clause) { + conditions.push_back(std::move(next_vertex_subpath->where_clause)); + } + next_vertex_element = GetPathElement(next_vertex_subpath->path_list[0]); + } + if (next_vertex_element->match_type != PGQMatchType::MATCH_VERTEX || + previous_vertex_element->match_type != PGQMatchType::MATCH_VERTEX) { + throw BinderException("Vertex and edge patterns must be alternated."); + } + auto next_vertex_table = + FindGraphTable(next_vertex_element->label, pg_table); + CheckInheritance(next_vertex_table, next_vertex_element, conditions); + alias_map[next_vertex_element->variable_binding] = + next_vertex_table->table_name; + + PathElement *edge_element = GetPathElement(path_list[idx_j]); + if (!edge_element) { + // We are dealing with a subpath + auto edge_subpath = reinterpret_cast(path_list[idx_j].get()); + if (edge_subpath->where_clause) { + conditions.push_back(std::move(edge_subpath->where_clause)); + } + if (edge_subpath->path_list.size() > 1) { + throw NotImplementedException( + "Subpath on an edge is not yet supported."); + } + edge_element = GetPathElement(edge_subpath->path_list[0]); + auto edge_table = FindGraphTable(edge_element->label, pg_table); + + if (edge_subpath->upper > 1) { + // Add the path-finding + AddPathFinding(select_node, from_clause, conditions, + previous_vertex_element->variable_binding, + edge_element->variable_binding, + next_vertex_element->variable_binding, edge_table, + edge_subpath); + } else { + alias_map[edge_element->variable_binding] = + edge_table->source_reference; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, + previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, + alias_map, extra_alias_counter); + } + } else { + // The edge element is a path element without WHERE or path-finding. + auto edge_table = FindGraphTable(edge_element->label, pg_table); + CheckInheritance(edge_table, edge_element, conditions); + // check aliases + alias_map[edge_element->variable_binding] = edge_table->table_name; + AddEdgeJoins(select_node, edge_table, previous_vertex_table, + next_vertex_table, edge_element->match_type, + edge_element->variable_binding, + previous_vertex_element->variable_binding, + next_vertex_element->variable_binding, conditions, alias_map, + extra_alias_counter); + // Check the edge type + // If (a)-[b]->(c) -> b.src = a.id AND b.dst = c.id + // If (a)<-[b]-(c) -> b.dst = a.id AND b.src = c.id + // If (a)-[b]-(c) -> (b.src = a.id AND b.dst = c.id) OR + // (b.dst = a.id AND b.src + // = c.id) If (a)<-[b]->(c) -> (b.src = a.id AND b.dst = c.id) AND + // (b.dst = a.id AND b.src + //= c.id) + } + previous_vertex_element = next_vertex_element; + previous_vertex_table = next_vertex_table; + } +} + +unique_ptr +PGQMatchFunction::MatchBindReplace(ClientContext &context, + TableFunctionBindInput &) { + auto duckpgq_state_entry = context.registered_state.find("duckpgq"); + auto duckpgq_state = + dynamic_cast(duckpgq_state_entry->second.get()); + + auto ref = dynamic_cast( + duckpgq_state->transform_expression.get()); + auto pg_table = duckpgq_state->GetPropertyGraph(ref->pg_name); + + auto data = make_uniq(); + + vector> conditions; + + auto select_node = make_uniq(); + unordered_map alias_map; + unique_ptr from_clause; + + int32_t extra_alias_counter = 0; + for (idx_t idx_i = 0; idx_i < ref->path_patterns.size(); idx_i++) { + auto &path_pattern = ref->path_patterns[idx_i]; + // Check if the element is PathElement or a Subpath with potentially many + // items + ProcessPathList(path_pattern->path_elements, conditions, from_clause, + select_node, alias_map, *pg_table, extra_alias_counter, + ref->column_list); + } + + // Go through all aliases encountered + for (auto &table_alias_entry : alias_map) { + auto table_ref = make_uniq(); + table_ref->table_name = table_alias_entry.second; + table_ref->alias = table_alias_entry.first; + + if (from_clause) { + auto new_root = make_uniq(JoinRefType::CROSS); + new_root->left = std::move(from_clause); + new_root->right = std::move(table_ref); + from_clause = std::move(new_root); + } else { + from_clause = std::move(table_ref); + } + } + + select_node->from_table = std::move(from_clause); + + if (ref->where_clause) { + conditions.push_back(std::move(ref->where_clause)); + } + std::vector> final_column_list; + + for (auto &expression : ref->column_list) { + unordered_set named_subpaths; + auto column_ref = dynamic_cast(expression.get()); + if (column_ref != nullptr) { + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + final_column_list.emplace_back(make_uniq( + "path", column_ref->column_names[0])); + } else { + final_column_list.push_back(std::move(expression)); + } + continue; + } + auto function_ref = dynamic_cast(expression.get()); + if (function_ref != nullptr) { + if (function_ref->function_name == "path_length") { + column_ref = dynamic_cast( + function_ref->children[0].get()); + if (column_ref == nullptr) { + continue; + } + if (named_subpaths.count(column_ref->column_names[0]) && + column_ref->column_names.size() == 1) { + auto path_ref = make_uniq( + "path", column_ref->column_names[0]); + vector> path_children; + path_children.push_back(std::move(path_ref)); + auto path_len = + make_uniq("len", std::move(path_children)); + auto constant_two = make_uniq(Value::INTEGER(2)); + vector> div_children; + div_children.push_back(std::move(path_len)); + div_children.push_back(std::move(constant_two)); + auto div_expression = + make_uniq("//", std::move(div_children)); + div_expression->alias = "path_length_" + column_ref->column_names[0]; + final_column_list.emplace_back(std::move(div_expression)); + } + } else { + final_column_list.push_back(std::move(expression)); + } + + continue; + } + + final_column_list.push_back(std::move(expression)); + } + + select_node->where_clause = CreateWhereClause(conditions); + select_node->select_list = std::move(final_column_list); + + auto subquery = make_uniq(); + subquery->node = std::move(select_node); + + auto result = make_uniq(std::move(subquery), ref->alias); + + return std::move(result); +} } // namespace duckdb diff --git a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp index d9ef7a7f..ba646aac 100644 --- a/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp +++ b/duckpgq/src/duckpgq/functions/tablefunctions/pgq_scan.cpp @@ -39,8 +39,8 @@ static void ScanCSREFunction(ClientContext &context, TableFunctionInput &data_p, FlatVector::SetData(output.data[0], (data_ptr_t)csr->e.data()); } -static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_p, - DataChunk &output) { +static void ScanCSRPtrFunction(ClientContext &context, + TableFunctionInput &data_p, DataChunk &output) { bool &gstate = ((CSRScanState &)*data_p.global_state).finished; if (gstate) { @@ -74,7 +74,7 @@ static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_ // the third element is the address of the weight array // the fifth element is the type of the weight array // 0 if the weights are integres, 1 if they are doubles, and 2 for unweighted - if(csr->w.size()) { + if (csr->w.size()) { result_data[2] = (uint64_t)(&(csr->w)); result_data[4] = (uint64_t)(0); } else if (csr->w_double.size()) { @@ -84,7 +84,8 @@ static void ScanCSRPtrFunction(ClientContext &context, TableFunctionInput &data_ result_data[2] = (uint64_t)(0); result_data[4] = (uint64_t)(2); } - // we also need the number of elements in the vertex array, since its C-array not a vector. + // we also need the number of elements in the vertex array, since its C-array + // not a vector. result_data[3] = (uint64_t)(csr->vsize); } diff --git a/duckpgq/src/duckpgq_extension.cpp b/duckpgq/src/duckpgq_extension.cpp index ead1e2e6..1cadbe71 100644 --- a/duckpgq/src/duckpgq_extension.cpp +++ b/duckpgq/src/duckpgq_extension.cpp @@ -90,17 +90,17 @@ BoundStatement duckpgq_bind(ClientContext &context, Binder &binder, SQLStatement &statement) { auto lookup = context.registered_state.find("duckpgq"); if (lookup == context.registered_state.end()) { - throw BinderException("Registered state not found"); - } - - auto duckpgq_state = (DuckPGQState *)lookup->second.get(); - auto duckpgq_binder = Binder::CreateBinder(context); - auto duckpgq_parse_data = - dynamic_cast(duckpgq_state->parse_data.get()); - if (duckpgq_parse_data) { - return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); - } - throw BinderException("Unable to find DuckPGQ Parse Data"); + throw BinderException("Registered state not found"); + } + + auto duckpgq_state = (DuckPGQState *)lookup->second.get(); + auto duckpgq_binder = Binder::CreateBinder(context); + auto duckpgq_parse_data = + dynamic_cast(duckpgq_state->parse_data.get()); + if (duckpgq_parse_data) { + return duckpgq_binder->Bind(*(duckpgq_parse_data->statement)); + } + throw BinderException("Unable to find DuckPGQ Parse Data"); } ParserExtensionPlanResult @@ -136,13 +136,15 @@ duckpgq_plan(ParserExtensionInfo *, ClientContext &context, function->children.pop_back(); } throw Exception("use duckpgq_bind instead"); - } if (statement->type == StatementType::CREATE_STATEMENT) { + } + if (statement->type == StatementType::CREATE_STATEMENT) { ParserExtensionPlanResult result; result.function = CreatePropertyGraphFunction(); result.requires_valid_transaction = true; result.return_type = StatementReturnType::QUERY_RESULT; return result; - } if (statement->type == StatementType::DROP_STATEMENT) { + } + if (statement->type == StatementType::DROP_STATEMENT) { ParserExtensionPlanResult result; result.function = DropPropertyGraphFunction(); result.requires_valid_transaction = true;