From b03f71c728f5ad75d6b5269288ae23dca80d6f51 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 28 Nov 2023 13:40:01 -0500 Subject: [PATCH 1/3] fix(search): correctly handle nested Postgres JSON querying --- bento_lib/package.cfg | 2 +- bento_lib/search/postgres.py | 48 ++++++++++++++++++++++++++---------- tests/test_search.py | 48 +++++++++++++++++++++++++++++++----- 3 files changed, 78 insertions(+), 20 deletions(-) diff --git a/bento_lib/package.cfg b/bento_lib/package.cfg index 393286c..7699eb1 100644 --- a/bento_lib/package.cfg +++ b/bento_lib/package.cfg @@ -1,5 +1,5 @@ [package] name = bento_lib -version = 10.1.0 +version = 10.1.1 authors = David Lougheed, Paul Pillot author_emails = david.lougheed@mail.mcgill.ca, paul.pillot@computationalgenomics.ca diff --git a/bento_lib/search/postgres.py b/bento_lib/search/postgres.py index ae8c1a7..2f79434 100644 --- a/bento_lib/search/postgres.py +++ b/bento_lib/search/postgres.py @@ -20,6 +20,7 @@ QUERY_ROOT = q.Literal("$root") SQL_ROOT = sql.Identifier("_root") +SQL_NOTHING = sql.SQL("") # TODO: Python 3.7: Data Class @@ -41,6 +42,7 @@ def __init__( relations: OptionalComposablePair, aliases: OptionalComposablePair, current_alias_str: Optional[str], + current_alias_sql_schema: Optional[sql.SQL], key_link: Optional[Tuple[str, str]], field_alias: Optional[str], search_properties: dict, @@ -49,6 +51,7 @@ def __init__( self.relations: OptionalComposablePair = relations self.aliases: OptionalComposablePair = aliases self.current_alias_str: Optional[str] = current_alias_str + self.current_alias_sql_schema: Optional[sql.SQL] = current_alias_sql_schema self.key_link: Optional[Tuple[str, str]] = key_link self.field_alias: Optional[str] = field_alias self.search_properties: dict = search_properties @@ -80,7 +83,10 @@ def json_schema_to_postgres_type(schema: JSONSchema) -> str: return "TEXT" # TODO -def json_schema_to_postgres_schema(name: str, schema: JSONSchema) -> Tuple[Optional[sql.Composable], Optional[str]]: +def json_schema_to_postgres_schema( + name: str, + schema: JSONSchema, +) -> Tuple[Optional[sql.Composable], Optional[str], Optional[sql.Composable]]: """ Maps a JSON object schema to a Postgres schema for on-the-fly mapping. :param name: the name to give the fake table. @@ -88,15 +94,17 @@ def json_schema_to_postgres_schema(name: str, schema: JSONSchema) -> Tuple[Optio """ if schema["type"] != "object": - return None, None + return None, None, None return ( - sql.SQL("{}({})").format( - sql.Identifier(name), - sql.SQL(", ").join(sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s))) - for p, s in schema["properties"].items())), - "{}({})".format(name, ", ".join("{} {}".format(p, json_schema_to_postgres_type(s)) - for p, s in schema["properties"].items())) + sql.Identifier(name), + name, + sql.SQL("({})").format( + sql.SQL(", ").join( + sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s))) + for p, s in schema["properties"].items() + ) + ), ) @@ -145,6 +153,7 @@ def collect_resolve_join_tables( new_aliased_resolve_path = re.sub(r"[$\[\]]+", "", f"{aliased_resolve_path_str}_{schema_field}") current_alias = None current_alias_str = None + current_alias_sql_schema = None if current_relation is None and schema["type"] in ("array", "object"): if db_field is None: @@ -165,7 +174,10 @@ def collect_resolve_join_tables( else: # object # will be used to call either json_to_record(...) or jsonb_to_record(...): relation_sql_template = "{structure_type}_to_record({field})" - current_alias, current_alias_str = json_schema_to_postgres_schema(new_aliased_resolve_path, schema) + current_alias, current_alias_str, current_alias_sql_schema = json_schema_to_postgres_schema( + new_aliased_resolve_path, schema) + import sys + print(current_alias, current_alias_str, current_alias_sql_schema, file=sys.stderr) current_relation = sql.SQL(relation_sql_template).format( structure_type=sql.SQL(structure_type), # json or jsonb here @@ -224,6 +236,7 @@ def collect_resolve_join_tables( relations=relations, aliases=aliases, current_alias_str=current_alias_str, + current_alias_sql_schema=current_alias_sql_schema, key_link=key_link, field_alias=db_field, search_properties=search_properties, @@ -306,14 +319,19 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable: # (e.g., nested objects stored in their own relations). # If there is just 1 entry in terms, no join will occur, and it'll just be set to its alias. sql.SQL(" LEFT JOIN ").join(( - sql.SQL("{r1} AS {a1}").format(r1=terms[0].relations.current, a1=terms[0].aliases.current), + sql.SQL("{r1} AS {a1}{s1}").format( + r1=terms[0].relations.current, + a1=terms[0].aliases.current, + s1=terms[0].current_alias_sql_schema or SQL_NOTHING, + ), *( - sql.SQL("{r1} AS {a1} ON {a0}.{f0} = {a1}.{f1}").format( + sql.SQL("{r1} AS {a1}{s1} ON {a0}.{f0} = {a1}.{f1}").format( r1=term.relations.current, a0=term.aliases.parent, a1=term.aliases.current, f0=sql.Identifier(term.key_link[0]), - f1=sql.Identifier(term.key_link[1]) + f1=sql.Identifier(term.key_link[1]), + s1=term.current_alias_sql_schema or SQL_NOTHING, ) for term in terms[1:] if term.key_link is not None @@ -322,7 +340,11 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable: # Then, include any additional (non-terms[0]) non-joined relations. *( - sql.SQL("{r1} AS {a1}").format(r1=term.relations.current, a1=term.aliases.current) + sql.SQL("{r1} AS {a1}{s1}").format( + r1=term.relations.current, + a1=term.aliases.current, + s1=term.current_alias_sql_schema or SQL_NOTHING, + ) for term in terms[1:] if term.key_link is None and term.relations.current is not None ), diff --git a/tests/test_search.py b/tests/test_search.py index 6954bc5..fe11412 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -1,3 +1,4 @@ +import psycopg2.sql from bento_lib.search import build_search_response, data_structure, operations, postgres, queries from datetime import datetime from pytest import mark, raises @@ -142,6 +143,23 @@ "operations": [operations.SEARCH_OP_EQ], "queryable": "all" } + }, + "test2": { + "type": "object", + "properties": { + "test": { + "type": "string", + "search": { + "operations": [operations.SEARCH_OP_EQ], + "queryable": "all" + } + } + }, + "search": { + "database": { + "type": "json" + } + } } }, "search": { @@ -643,6 +661,11 @@ {"query": ["#ilike", ["#resolve", "biosamples", "[item]", "procedure", "code", "label"], "[%duM\\%\\_my]"], # 40 "ds": (False, False, 2, 0), "ps": (False, ("[%duM\\%\\_my]",))}, + + # Testing nested Postgres JSON schema-creation + {"query": ["#eq", ["#resolve", "biosamples", "[item]", "test_postgres_array", "[item]", "test2", "test"], "a"], + "ds": (False, True, 2, 2), # Accessing 2 biosamples, each with 1 test_postgres_array item + "ps": (False, ("a",))}, ] TEST_LARGE_QUERY_1 = [ @@ -717,7 +740,7 @@ { "procedure": {"code": {"id": "TEST", "label": "TEST LABEL"}}, "tumor_grade": [{"id": "TG1", "label": "TG1 LABEL"}, {"id": "TG2", "label": "TG2 LABEL"}], - "test_postgres_array": [{"test": "test_value"}], + "test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}], "test_json_array": [{"test": "test_value"}], }, { @@ -727,7 +750,7 @@ {"id": "TG4", "label": "TG4 LABEL"}, {"id": "TG5", "label": "TG5 LABEL"}, ], - "test_postgres_array": [{"test": "test_value"}], + "test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}], "test_json_array": [{"test": "test_value"}], } ], @@ -888,15 +911,27 @@ def test_queries_and_ast(): def test_postgres_schemas(): null_schema = postgres.json_schema_to_postgres_schema("test", {"type": "integer"}) - assert null_schema[0] is None and null_schema[1] is None + assert null_schema[0] is None and null_schema[1] is None and null_schema[2] is None for s, p in zip(JSON_SCHEMA_TYPES, POSTGRES_TYPES): - assert postgres.json_schema_to_postgres_schema("test", { + res = postgres.json_schema_to_postgres_schema("test", { "type": "object", "properties": { "test2": {"type": s} } - })[1] == f"test(test2 {p})" + }) + assert res[1] == "test" + assert res[2] == psycopg2.sql.Composed([ + psycopg2.sql.SQL("("), + psycopg2.sql.Composed([ + psycopg2.sql.Composed([ + psycopg2.sql.Identifier("test2"), + psycopg2.sql.SQL(" "), + psycopg2.sql.SQL(p) + ]) + ]), + psycopg2.sql.SQL(")"), + ]) def test_postgres_collect_resolve_join_tables(): @@ -922,7 +957,8 @@ def test_postgres_invalid_schemas(): def test_postgres_valid_queries(query): e = query["query"] i, p = query["ps"] - _, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i) + sql, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i) + print("Generated SQL", sql) assert params == p From d5b5a32a036913c7a605c2e46aad2691e610e7d9 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 28 Nov 2023 13:41:25 -0500 Subject: [PATCH 2/3] lint: rm debug logging --- bento_lib/search/postgres.py | 2 -- tests/test_search.py | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/bento_lib/search/postgres.py b/bento_lib/search/postgres.py index 2f79434..de53db5 100644 --- a/bento_lib/search/postgres.py +++ b/bento_lib/search/postgres.py @@ -176,8 +176,6 @@ def collect_resolve_join_tables( relation_sql_template = "{structure_type}_to_record({field})" current_alias, current_alias_str, current_alias_sql_schema = json_schema_to_postgres_schema( new_aliased_resolve_path, schema) - import sys - print(current_alias, current_alias_str, current_alias_sql_schema, file=sys.stderr) current_relation = sql.SQL(relation_sql_template).format( structure_type=sql.SQL(structure_type), # json or jsonb here diff --git a/tests/test_search.py b/tests/test_search.py index fe11412..582725e 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -957,8 +957,7 @@ def test_postgres_invalid_schemas(): def test_postgres_valid_queries(query): e = query["query"] i, p = query["ps"] - sql, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i) - print("Generated SQL", sql) + _, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i) assert params == p From 4cb82a163d2af869ad72bdd5bd0e01a0a89ff66f Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Tue, 28 Nov 2023 13:42:04 -0500 Subject: [PATCH 3/3] chore: set version to 10.1.1a1 --- bento_lib/package.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bento_lib/package.cfg b/bento_lib/package.cfg index 7699eb1..f95f4f2 100644 --- a/bento_lib/package.cfg +++ b/bento_lib/package.cfg @@ -1,5 +1,5 @@ [package] name = bento_lib -version = 10.1.1 +version = 10.1.1a1 authors = David Lougheed, Paul Pillot author_emails = david.lougheed@mail.mcgill.ca, paul.pillot@computationalgenomics.ca