From c65844415fcf1110b530ad963000f03431630385 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Thu, 19 Oct 2023 18:52:54 -0400 Subject: [PATCH] Added support for anyOf in loadxl (and schema_utils). --- snovault/loadxl.py | 21 +++++++---- snovault/schema_utils.py | 55 +++++++++++++++++++++++++++++ snovault/tests/test_schema_utils.py | 46 ++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 7 deletions(-) diff --git a/snovault/loadxl.py b/snovault/loadxl.py index 7b8585db6..d808bc68e 100644 --- a/snovault/loadxl.py +++ b/snovault/loadxl.py @@ -22,6 +22,7 @@ from dcicutils.misc_utils import ignored, environ_bool, VirtualApp from dcicutils.secrets_utils import assume_identity from snovault.util import debug_log +from .schema_utils import get_identifying_and_required_properties from .project_app import app_project from .server_defaults_misc import add_last_modified @@ -432,14 +433,20 @@ def load_all_gen(testapp, inserts, docsdir, overwrite=True, itype=None, from_jso # collect schemas profiles = testapp.get('/profiles/?frame=raw').json - def get_schema_info(obj_type: str) -> (list, list): - def get_camel_case_version_of_type_name(snake_case_version_of_type_name: str) -> str: + def get_schema_info(type_name: str) -> (list, list): + """ + Returns a tuple containing (first) the list of identifying properties and (second) the list + of any required properties specified by the schema associated with the object of the given + object type name. The schema is ASSUMED to be contained within the outer profiles dictionary + variable, keyed by the camel-case version of the given object type name, which itself is + assumed to be the snake-case version of the type name (though okay if already camel-case). + See get_identifying_and_required_properties for details of how these fields are extracted. + """ + def get_camel_case_version_of_type_name(type_name: str) -> str: # This conversion of schema name to object type works for all existing schemas at the moment. - return "".join([part.title() for part in snake_case_version_of_type_name.split('_')]) - schema = profiles[get_camel_case_version_of_type_name(obj_type)] - identifying_properties = schema.get("identifyingProperties", []) - required_properties = schema.get("required", []) - return (identifying_properties, required_properties) + return "".join([part.title() for part in type_name.split("_")]) + schema = profiles[get_camel_case_version_of_type_name(type_name)] + return get_identifying_and_required_properties(schema) # run step1 - if item does not exist, post with minimal metadata (and skip indexing since we will patch # in round 2) diff --git a/snovault/schema_utils.py b/snovault/schema_utils.py index fb30fbb04..16b055d77 100644 --- a/snovault/schema_utils.py +++ b/snovault/schema_utils.py @@ -617,3 +617,58 @@ def userid(instance, subschema): # args required by jsonschema-serialize-fork def now(instance, subschema): # args required by jsonschema-serialize-fork ignored(instance, subschema) return utc_now_str() + + +def get_identifying_and_required_properties(schema: dict) -> (list, list): + """ + Returns a tuple containing (first) the list of identifying properties + and (second) the list of any required properties specified by the given schema. + + This DOES handle a limited version of the "anyOf" construct; namely where it only contains + a simple list of objects each specifying a "required" property name or a list of property + names; in this call ALL such "required" property names are included; an EXCEPTION is + raised if an unsupported usage of this "anyOf" construct is found. + + This may be slightly confusing in that ALL of the properties specified within an "anyOf" + construct are returned from this function as required, which is not technically semantically + not correct; only ONE of those would be required; but this function is NOT used for validation, + but instead to extract from the actual object the values which must be included on the initial + insert into the database, when it is FIRST created, via POST in loadxl. + """ + def get_all_required_properties_from_any_of(schema: dict) -> list: + """ + Returns a list of ALL property names which are specified as "required" within any "anyOf" + construct within the given JSON schema. We support ONLY a LIMITED version of "anyOf" construct, + in which it must contain ONLY a simple list of objects each specifying a "required" property + name or list of property names; if the "anyOf" construct looks like it is anything OTHER + than this limited usaage, then an EXCEPTION will be raised. + """ + required_properties = set() + any_of_list = schema.get("anyOf") + if not any_of_list: + return required_properties + if not isinstance(any_of_list, list): + raise Exception("Unsupported use of anyOf in schema.") + for any_of in any_of_list: + if not any_of: + continue + if not isinstance(any_of, dict): + raise Exception("Unsupported use of anyOf in schema.") + for any_of_key, any_of_value in any_of.items(): + if any_of_key != "required": + raise Exception("Unsupported use of anyOf in schema.") + if not any_of_value: + continue + if isinstance(any_of_value, list): + required_properties.update(any_of_value) + elif isinstance(any_of_value, str): + required_properties.add(any_of_value) + else: + raise Exception("Unsupported use of anyOf in schema.") + return list(required_properties) + + identifying_properties = schema.get("identifyingProperties", []) + required_properties = set() + required_properties.update(schema.get("required", [])) + required_properties.update(get_all_required_properties_from_any_of(schema)) + return identifying_properties, list(required_properties) diff --git a/snovault/tests/test_schema_utils.py b/snovault/tests/test_schema_utils.py index 08b34cbae..046bffbb2 100644 --- a/snovault/tests/test_schema_utils.py +++ b/snovault/tests/test_schema_utils.py @@ -448,3 +448,49 @@ def test_schema_utils_merge_regex_matches(ref): def test_schema_utils_merge_regex_no_match(ref): """ Positive test for testing the merge regex match """ assert not match_merge_syntax(ref) + + +def test_get_identifying_and_required_properties(): + + from snovault.schema_utils import get_identifying_and_required_properties, load_schema + + schema = load_schema("snovault:schemas/access_key.json") + identifying_properties, required_properties = get_identifying_and_required_properties(schema) + assert identifying_properties == ["uuid"] + assert required_properties == [] + + schema = { + "identifyingProperties": ["uuid", "another_id"], + "required": ["some_required_property_a", "some_required_property_b"], + "anyOf": [ + {"required": ["either_require_this_property_a"]}, + {"required": ["or_require_this_property_a"]} + ] + } + identifying_properties, required_properties = get_identifying_and_required_properties(schema) + assert set(identifying_properties) == {"another_id", "uuid"} + assert set(required_properties) == {"some_required_property_a", "some_required_property_b", "either_require_this_property_a", "or_require_this_property_a"} + + schema = { + "identifyingProperties": ["uuid", "another_id"], + "anyOf": [ + {"required": "either_require_this_property_a"}, + {"required": "or_require_this_property_a"} + ] + } + identifying_properties, required_properties = get_identifying_and_required_properties(schema) + assert set(identifying_properties) == {"another_id", "uuid"} + assert set(required_properties) == {"either_require_this_property_a", "or_require_this_property_a"} + + with pytest.raises(Exception): + schema = { + "required": ["some_required_property_a", "some_required_property_b"], + "anyOf": [ + {"unexpected": "dummy"}, + {"required": "either_require_this_property_a"}, + {"required": "or_require_this_property_a"} + ] + } + identifying_properties, required_properties = get_identifying_and_required_properties(schema) + assert set(identifying_properties) == {} + assert set(required_properties) == {"some_required_property_a", "some_required_property_b", "either_require_this_property_a", "or_require_this_property_a"}