Skip to content

Commit

Permalink
fix(search): correctly handle nested Postgres JSON querying
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Nov 28, 2023
1 parent cabb1d7 commit b03f71c
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 20 deletions.
2 changes: 1 addition & 1 deletion bento_lib/package.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = bento_lib
version = 10.1.0
version = 10.1.1
authors = David Lougheed, Paul Pillot
author_emails = [email protected], [email protected]
48 changes: 35 additions & 13 deletions bento_lib/search/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

QUERY_ROOT = q.Literal("$root")
SQL_ROOT = sql.Identifier("_root")
SQL_NOTHING = sql.SQL("")


# TODO: Python 3.7: Data Class
Expand All @@ -41,6 +42,7 @@ def __init__(
relations: OptionalComposablePair,
aliases: OptionalComposablePair,
current_alias_str: Optional[str],
current_alias_sql_schema: Optional[sql.SQL],
key_link: Optional[Tuple[str, str]],
field_alias: Optional[str],
search_properties: dict,
Expand All @@ -49,6 +51,7 @@ def __init__(
self.relations: OptionalComposablePair = relations
self.aliases: OptionalComposablePair = aliases
self.current_alias_str: Optional[str] = current_alias_str
self.current_alias_sql_schema: Optional[sql.SQL] = current_alias_sql_schema
self.key_link: Optional[Tuple[str, str]] = key_link
self.field_alias: Optional[str] = field_alias
self.search_properties: dict = search_properties
Expand Down Expand Up @@ -80,23 +83,28 @@ def json_schema_to_postgres_type(schema: JSONSchema) -> str:
return "TEXT" # TODO


def json_schema_to_postgres_schema(name: str, schema: JSONSchema) -> Tuple[Optional[sql.Composable], Optional[str]]:
def json_schema_to_postgres_schema(
name: str,
schema: JSONSchema,
) -> Tuple[Optional[sql.Composable], Optional[str], Optional[sql.Composable]]:
"""
Maps a JSON object schema to a Postgres schema for on-the-fly mapping.
:param name: the name to give the fake table.
:param schema: JSON schema to map.
"""

if schema["type"] != "object":
return None, None
return None, None, None

return (
sql.SQL("{}({})").format(
sql.Identifier(name),
sql.SQL(", ").join(sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s)))
for p, s in schema["properties"].items())),
"{}({})".format(name, ", ".join("{} {}".format(p, json_schema_to_postgres_type(s))
for p, s in schema["properties"].items()))
sql.Identifier(name),
name,
sql.SQL("({})").format(
sql.SQL(", ").join(
sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s)))
for p, s in schema["properties"].items()
)
),
)


Expand Down Expand Up @@ -145,6 +153,7 @@ def collect_resolve_join_tables(
new_aliased_resolve_path = re.sub(r"[$\[\]]+", "", f"{aliased_resolve_path_str}_{schema_field}")
current_alias = None
current_alias_str = None
current_alias_sql_schema = None

if current_relation is None and schema["type"] in ("array", "object"):
if db_field is None:
Expand All @@ -165,7 +174,10 @@ def collect_resolve_join_tables(
else: # object
# will be used to call either json_to_record(...) or jsonb_to_record(...):
relation_sql_template = "{structure_type}_to_record({field})"
current_alias, current_alias_str = json_schema_to_postgres_schema(new_aliased_resolve_path, schema)
current_alias, current_alias_str, current_alias_sql_schema = json_schema_to_postgres_schema(
new_aliased_resolve_path, schema)
import sys
print(current_alias, current_alias_str, current_alias_sql_schema, file=sys.stderr)

current_relation = sql.SQL(relation_sql_template).format(
structure_type=sql.SQL(structure_type), # json or jsonb here
Expand Down Expand Up @@ -224,6 +236,7 @@ def collect_resolve_join_tables(
relations=relations,
aliases=aliases,
current_alias_str=current_alias_str,
current_alias_sql_schema=current_alias_sql_schema,
key_link=key_link,
field_alias=db_field,
search_properties=search_properties,
Expand Down Expand Up @@ -306,14 +319,19 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable:
# (e.g., nested objects stored in their own relations).
# If there is just 1 entry in terms, no join will occur, and it'll just be set to its alias.
sql.SQL(" LEFT JOIN ").join((
sql.SQL("{r1} AS {a1}").format(r1=terms[0].relations.current, a1=terms[0].aliases.current),
sql.SQL("{r1} AS {a1}{s1}").format(
r1=terms[0].relations.current,
a1=terms[0].aliases.current,
s1=terms[0].current_alias_sql_schema or SQL_NOTHING,
),
*(
sql.SQL("{r1} AS {a1} ON {a0}.{f0} = {a1}.{f1}").format(
sql.SQL("{r1} AS {a1}{s1} ON {a0}.{f0} = {a1}.{f1}").format(
r1=term.relations.current,
a0=term.aliases.parent,
a1=term.aliases.current,
f0=sql.Identifier(term.key_link[0]),
f1=sql.Identifier(term.key_link[1])
f1=sql.Identifier(term.key_link[1]),
s1=term.current_alias_sql_schema or SQL_NOTHING,
)
for term in terms[1:]
if term.key_link is not None
Expand All @@ -322,7 +340,11 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable:

# Then, include any additional (non-terms[0]) non-joined relations.
*(
sql.SQL("{r1} AS {a1}").format(r1=term.relations.current, a1=term.aliases.current)
sql.SQL("{r1} AS {a1}{s1}").format(
r1=term.relations.current,
a1=term.aliases.current,
s1=term.current_alias_sql_schema or SQL_NOTHING,
)
for term in terms[1:]
if term.key_link is None and term.relations.current is not None
),
Expand Down
48 changes: 42 additions & 6 deletions tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import psycopg2.sql
from bento_lib.search import build_search_response, data_structure, operations, postgres, queries
from datetime import datetime
from pytest import mark, raises
Expand Down Expand Up @@ -142,6 +143,23 @@
"operations": [operations.SEARCH_OP_EQ],
"queryable": "all"
}
},
"test2": {
"type": "object",
"properties": {
"test": {
"type": "string",
"search": {
"operations": [operations.SEARCH_OP_EQ],
"queryable": "all"
}
}
},
"search": {
"database": {
"type": "json"
}
}
}
},
"search": {
Expand Down Expand Up @@ -643,6 +661,11 @@
{"query": ["#ilike", ["#resolve", "biosamples", "[item]", "procedure", "code", "label"], "[%duM\\%\\_my]"], # 40
"ds": (False, False, 2, 0),
"ps": (False, ("[%duM\\%\\_my]",))},

# Testing nested Postgres JSON schema-creation
{"query": ["#eq", ["#resolve", "biosamples", "[item]", "test_postgres_array", "[item]", "test2", "test"], "a"],
"ds": (False, True, 2, 2), # Accessing 2 biosamples, each with 1 test_postgres_array item
"ps": (False, ("a",))},
]

TEST_LARGE_QUERY_1 = [
Expand Down Expand Up @@ -717,7 +740,7 @@
{
"procedure": {"code": {"id": "TEST", "label": "TEST LABEL"}},
"tumor_grade": [{"id": "TG1", "label": "TG1 LABEL"}, {"id": "TG2", "label": "TG2 LABEL"}],
"test_postgres_array": [{"test": "test_value"}],
"test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}],
"test_json_array": [{"test": "test_value"}],
},
{
Expand All @@ -727,7 +750,7 @@
{"id": "TG4", "label": "TG4 LABEL"},
{"id": "TG5", "label": "TG5 LABEL"},
],
"test_postgres_array": [{"test": "test_value"}],
"test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}],
"test_json_array": [{"test": "test_value"}],
}
],
Expand Down Expand Up @@ -888,15 +911,27 @@ def test_queries_and_ast():

def test_postgres_schemas():
null_schema = postgres.json_schema_to_postgres_schema("test", {"type": "integer"})
assert null_schema[0] is None and null_schema[1] is None
assert null_schema[0] is None and null_schema[1] is None and null_schema[2] is None

for s, p in zip(JSON_SCHEMA_TYPES, POSTGRES_TYPES):
assert postgres.json_schema_to_postgres_schema("test", {
res = postgres.json_schema_to_postgres_schema("test", {
"type": "object",
"properties": {
"test2": {"type": s}
}
})[1] == f"test(test2 {p})"
})
assert res[1] == "test"
assert res[2] == psycopg2.sql.Composed([
psycopg2.sql.SQL("("),
psycopg2.sql.Composed([
psycopg2.sql.Composed([
psycopg2.sql.Identifier("test2"),
psycopg2.sql.SQL(" "),
psycopg2.sql.SQL(p)
])
]),
psycopg2.sql.SQL(")"),
])


def test_postgres_collect_resolve_join_tables():
Expand All @@ -922,7 +957,8 @@ def test_postgres_invalid_schemas():
def test_postgres_valid_queries(query):
e = query["query"]
i, p = query["ps"]
_, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i)
sql, params = postgres.search_query_to_psycopg2_sql(e, TEST_SCHEMA, i)
print("Generated SQL", sql)
assert params == p


Expand Down

0 comments on commit b03f71c

Please sign in to comment.