Skip to content

Commit

Permalink
Added support for anyOf in loadxl (and schema_utils).
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed Oct 19, 2023
1 parent 2c10fc8 commit c658444
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 7 deletions.
21 changes: 14 additions & 7 deletions snovault/loadxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dcicutils.misc_utils import ignored, environ_bool, VirtualApp
from dcicutils.secrets_utils import assume_identity
from snovault.util import debug_log
from .schema_utils import get_identifying_and_required_properties
from .project_app import app_project
from .server_defaults_misc import add_last_modified

Expand Down Expand Up @@ -432,14 +433,20 @@ def load_all_gen(testapp, inserts, docsdir, overwrite=True, itype=None, from_jso
# collect schemas
profiles = testapp.get('/profiles/?frame=raw').json

def get_schema_info(obj_type: str) -> (list, list):
def get_camel_case_version_of_type_name(snake_case_version_of_type_name: str) -> str:
def get_schema_info(type_name: str) -> (list, list):
"""
Returns a tuple containing (first) the list of identifying properties and (second) the list
of any required properties specified by the schema associated with the object of the given
object type name. The schema is ASSUMED to be contained within the outer profiles dictionary
variable, keyed by the camel-case version of the given object type name, which itself is
assumed to be the snake-case version of the type name (though okay if already camel-case).
See get_identifying_and_required_properties for details of how these fields are extracted.
"""
def get_camel_case_version_of_type_name(type_name: str) -> str:
# This conversion of schema name to object type works for all existing schemas at the moment.
return "".join([part.title() for part in snake_case_version_of_type_name.split('_')])
schema = profiles[get_camel_case_version_of_type_name(obj_type)]
identifying_properties = schema.get("identifyingProperties", [])
required_properties = schema.get("required", [])
return (identifying_properties, required_properties)
return "".join([part.title() for part in type_name.split("_")])
schema = profiles[get_camel_case_version_of_type_name(type_name)]
return get_identifying_and_required_properties(schema)

# run step1 - if item does not exist, post with minimal metadata (and skip indexing since we will patch
# in round 2)
Expand Down
55 changes: 55 additions & 0 deletions snovault/schema_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,3 +617,58 @@ def userid(instance, subschema): # args required by jsonschema-serialize-fork
def now(instance, subschema): # args required by jsonschema-serialize-fork
ignored(instance, subschema)
return utc_now_str()


def get_identifying_and_required_properties(schema: dict) -> (list, list):
"""
Returns a tuple containing (first) the list of identifying properties
and (second) the list of any required properties specified by the given schema.
This DOES handle a limited version of the "anyOf" construct; namely where it only contains
a simple list of objects each specifying a "required" property name or a list of property
names; in this call ALL such "required" property names are included; an EXCEPTION is
raised if an unsupported usage of this "anyOf" construct is found.
This may be slightly confusing in that ALL of the properties specified within an "anyOf"
construct are returned from this function as required, which is not technically semantically
not correct; only ONE of those would be required; but this function is NOT used for validation,
but instead to extract from the actual object the values which must be included on the initial
insert into the database, when it is FIRST created, via POST in loadxl.
"""
def get_all_required_properties_from_any_of(schema: dict) -> list:
"""
Returns a list of ALL property names which are specified as "required" within any "anyOf"
construct within the given JSON schema. We support ONLY a LIMITED version of "anyOf" construct,
in which it must contain ONLY a simple list of objects each specifying a "required" property
name or list of property names; if the "anyOf" construct looks like it is anything OTHER
than this limited usaage, then an EXCEPTION will be raised.
"""
required_properties = set()
any_of_list = schema.get("anyOf")
if not any_of_list:
return required_properties
if not isinstance(any_of_list, list):
raise Exception("Unsupported use of anyOf in schema.")
for any_of in any_of_list:
if not any_of:
continue
if not isinstance(any_of, dict):
raise Exception("Unsupported use of anyOf in schema.")
for any_of_key, any_of_value in any_of.items():
if any_of_key != "required":
raise Exception("Unsupported use of anyOf in schema.")
if not any_of_value:
continue
if isinstance(any_of_value, list):
required_properties.update(any_of_value)
elif isinstance(any_of_value, str):
required_properties.add(any_of_value)
else:
raise Exception("Unsupported use of anyOf in schema.")
return list(required_properties)

identifying_properties = schema.get("identifyingProperties", [])
required_properties = set()
required_properties.update(schema.get("required", []))
required_properties.update(get_all_required_properties_from_any_of(schema))
return identifying_properties, list(required_properties)
46 changes: 46 additions & 0 deletions snovault/tests/test_schema_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,49 @@ def test_schema_utils_merge_regex_matches(ref):
def test_schema_utils_merge_regex_no_match(ref):
""" Positive test for testing the merge regex match """
assert not match_merge_syntax(ref)


def test_get_identifying_and_required_properties():

from snovault.schema_utils import get_identifying_and_required_properties, load_schema

schema = load_schema("snovault:schemas/access_key.json")
identifying_properties, required_properties = get_identifying_and_required_properties(schema)
assert identifying_properties == ["uuid"]
assert required_properties == []

schema = {
"identifyingProperties": ["uuid", "another_id"],
"required": ["some_required_property_a", "some_required_property_b"],
"anyOf": [
{"required": ["either_require_this_property_a"]},
{"required": ["or_require_this_property_a"]}
]
}
identifying_properties, required_properties = get_identifying_and_required_properties(schema)
assert set(identifying_properties) == {"another_id", "uuid"}
assert set(required_properties) == {"some_required_property_a", "some_required_property_b", "either_require_this_property_a", "or_require_this_property_a"}

schema = {
"identifyingProperties": ["uuid", "another_id"],
"anyOf": [
{"required": "either_require_this_property_a"},
{"required": "or_require_this_property_a"}
]
}
identifying_properties, required_properties = get_identifying_and_required_properties(schema)
assert set(identifying_properties) == {"another_id", "uuid"}
assert set(required_properties) == {"either_require_this_property_a", "or_require_this_property_a"}

with pytest.raises(Exception):
schema = {
"required": ["some_required_property_a", "some_required_property_b"],
"anyOf": [
{"unexpected": "dummy"},
{"required": "either_require_this_property_a"},
{"required": "or_require_this_property_a"}
]
}
identifying_properties, required_properties = get_identifying_and_required_properties(schema)
assert set(identifying_properties) == {}
assert set(required_properties) == {"some_required_property_a", "some_required_property_b", "either_require_this_property_a", "or_require_this_property_a"}

0 comments on commit c658444

Please sign in to comment.