diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 248887343..c50a6d38a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,14 @@ dcicutils Change Log ---------- +8.2.0 +===== +* 2023-11-02 +* Added ``SchemaManager.get_identifying_properties`` in ``bundle_utils`` + which implicitly adds ``identifier`` to ``identifyingProperties``. +* Added support for ``portal_vapp`` to to `ff_utils.get_metadata``. + + 8.1.0 ===== diff --git a/dcicutils/bundle_utils.py b/dcicutils/bundle_utils.py index 2c6304e8b..fb76c74a5 100644 --- a/dcicutils/bundle_utils.py +++ b/dcicutils/bundle_utils.py @@ -4,14 +4,13 @@ from .common import AnyJsonData from .env_utils import EnvUtils, public_env_name from .ff_utils import get_metadata -from .lang_utils import there_are from .misc_utils import AbstractVirtualApp, ignored, ignorable, PRINT, to_camel_case from .sheet_utils import ( LoadTableError, prefer_number, TabbedJsonSchemas, Header, Headers, TabbedHeaders, ParsedHeader, ParsedHeaders, TabbedParsedHeaders, SheetCellValue, TabbedSheetData, TableSetManagerRegistry, AbstractTableSetManager, InsertsManager, TableSetManager, load_table_set, ) -from .validation_utils import SchemaManager, validate_data_against_schemas, summary_of_data_validation_errors +from .validation_utils import SchemaManager, validate_data_against_schemas PatchPrototype = Dict @@ -40,7 +39,8 @@ def __str__(self): class ValidationProblem(Exception): - pass + def __init__(self, problems: Optional[dict] = None): + self.problems = problems class TypeHint: @@ -506,7 +506,8 @@ def raise_any_pending_problems(self): if problems: for problem in problems: PRINT(f"Problem: {problem}") - raise Exception(there_are(problems, kind='problem while compiling hints', tense='past', show=False)) + raise ValidationProblem(problems) + # raise Exception(there_are(problems, kind='problem while compiling hints', tense='past', show=False)) def check_tabs(self): result = {tab_name: self.check_tab(tab_name) @@ -522,7 +523,8 @@ def validate_ref(self, item_type, item_ref): return True try: # TODO: This probably needs a cache - info = get_metadata(f"/{to_camel_case(item_type)}/{item_ref}") + info = get_metadata(f"/{to_camel_case(item_type)}/{item_ref}", + ff_env=self.portal_env, vapp=self.portal_vapp) # Basically return True if there's a value at all, # but still check it's not an error message that didn't get raised. return isinstance(info, dict) and 'uuid' in info @@ -653,18 +655,8 @@ def load_items(filename: str, tab_name: Optional[str] = None, escaping: Optional # No fancy checking for things like .json, etc. for now. Only check things that came from # spreadsheet-like data, where structural datatypes are forced into strings. checked_items = tabbed_rows - if validate: problems = validate_data_against_schemas(checked_items, portal_env=portal_env, portal_vapp=portal_vapp, override_schemas=override_schemas) - error_summary = summary_of_data_validation_errors(problems) - if error_summary: - for item in error_summary: - PRINT(item) - raise Exception("Validation problems were seen.") - # TODO: Maybe connect validation here. Although another option is to just call validation separately - # once this is successfully loaded. Needs thought. However, David's validation_utils can do - # the validation if we decide to do it, it would just need to be connected up. - # -kmp 23-Oct-2023 - raise NotImplementedError("Need to implement validation.") + return checked_items, problems return checked_items diff --git a/dcicutils/ff_utils.py b/dcicutils/ff_utils.py index 6f011bea4..e2c6ac089 100644 --- a/dcicutils/ff_utils.py +++ b/dcicutils/ff_utils.py @@ -277,7 +277,7 @@ def _sls(val): return val.lstrip('/') -def get_metadata(obj_id, key=None, ff_env=None, check_queue=False, add_on=''): +def get_metadata(obj_id, key=None, ff_env=None, check_queue=False, add_on='', vapp: Optional[VirtualApp] = None): """ Function to get metadata for a given obj_id (uuid or @id, most likely). Either takes a dictionary form authentication (MUST include 'server') @@ -290,6 +290,13 @@ def get_metadata(obj_id, key=None, ff_env=None, check_queue=False, add_on=''): "frame=object&force_md5" *REQUIRES ff_env if check_queue is used* """ + if vapp: + url = f"/{obj_id}?{add_on}" + response = vapp.get(url) + if response and response.status_code in [301, 302, 303, 307, 308]: + response = response.follow() + return get_response_json(response) + auth = get_authentication_with_server(key, ff_env) if check_queue and stuff_in_queues(ff_env, check_secondary=False): add_on += '&datastore=database' @@ -989,6 +996,12 @@ def get_schema(name, key=None, ff_env: Optional[str] = None, portal_env: Optiona portal_env = resolve_portal_env(ff_env=ff_env, portal_env=portal_env, portal_vapp=portal_vapp) base_url = f"profiles/{to_camel_case(name)}.json" add_on = 'frame=raw' + + # TODO + # Now that get_metadata supported portal_vapp we can do: + # return get_metadata(obj_id=base_url, key=key, ff_env=portal_env, add_on=add_on, vapp=portal_vapp) + # however this breaks test_ff_utils.test_get_schema_with_vapp and no time to fix. 2023-11-02. + if portal_vapp: full_url = f"/{base_url}?{add_on}" res = portal_vapp.get(full_url) diff --git a/dcicutils/validation_utils.py b/dcicutils/validation_utils.py index 9b42069a0..9e80146a0 100644 --- a/dcicutils/validation_utils.py +++ b/dcicutils/validation_utils.py @@ -77,7 +77,7 @@ def fetch_schema(self, schema_name: str): def identifying_properties(self, schema: Optional[JsonSchema] = None, schema_name: Optional[str] = None, among: Optional[List[str]] = None): schema = schema if schema is not None else self.fetch_schema(schema_name) - possible_identifying_properties = set(schema.get("identifyingProperties") or []) | {'uuid'} + possible_identifying_properties = set(self.get_identifying_properties(schema)) | {'uuid'} identifying_properties = sorted(possible_identifying_properties if among is None else (prop @@ -97,6 +97,16 @@ def identifying_value(cls, data_item: Dict[str, AnyJsonData], identifying_proper f' {disjoined_list([repr(x) for x in identifying_properties])}' f' in {json.dumps(data_item)}.') + @staticmethod + def get_identifying_properties(schema: dict) -> list: + if not schema: + return [] + identifying_properties = schema.get("identifyingProperties", []) + # Implicitly add "identifier" to "identifyingProperties", if it exists. + if "identifier" not in identifying_properties and "identifier" in schema.get("properties", {}): + identifying_properties.append("identifier") + return identifying_properties + def validate_data_against_schemas(data: TabbedSheetData, *, portal_env: Optional[str] = None, @@ -196,7 +206,7 @@ def validate_data_item_against_schemas(data_item: AnyJsonData, data_type: str, """ errors = [] - identifying_properties = schema.get("identifyingProperties", []) + identifying_properties = SchemaManager.get_identifying_properties(schema) identifying_value = SchemaManager.identifying_value(data_item, identifying_properties) if not identifying_value: errors.append({ @@ -264,7 +274,7 @@ def summary_of_data_validation_errors(data_validation_errors: Dict, missing_properties_count += 1 if error.get("extraneous_properties"): extraneous_properties_count += 1 - if error.get("unclassified_error_count"): + if error.get("unclassified_error"): unclassified_error_count += 1 if error.get("exception"): exception_count += 1 diff --git a/pyproject.toml b/pyproject.toml index 091a071a3..5c3863c3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.1.0" +version = "8.2.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/test/test_bundle_utils.py b/test/test_bundle_utils.py index b5d837339..6e1da6685 100644 --- a/test/test_bundle_utils.py +++ b/test/test_bundle_utils.py @@ -635,10 +635,8 @@ def get(self, path_url): old_count = portal_vapp.call_count with mock.patch.object(ff_utils_module, "get_authentication_with_server", mock_not_called("get_authentication_with_server")): - with mock.patch.object(ff_utils_module, "get_metadata", - mock_not_called("get_metadata")): - actual_items = load_items(SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE, - tab_name='ExperimentSeq', portal_vapp=portal_vapp) + actual_items = load_items(SAMPLE_ITEMS_FOR_REAL_SCHEMAS_FILE, + tab_name='ExperimentSeq', portal_vapp=portal_vapp) assert portal_vapp.call_count == old_count + 1 assert actual_items == expected_items @@ -718,12 +716,13 @@ def test_table_checker(): flattened=True, portal_env=mock_ff_env) checker.check_tabs() - assert str(exc.value) == "There were 2 problems while compiling hints." - assert printed.lines == [ - f"Problem: User[0].project: Unable to validate Project reference: {SAMPLE_PROJECT_UUID!r}", - (f"Problem: User[0].user_institution: Unable to validate Institution reference:" - f" {SAMPLE_INSTITUTION_UUID!r}") + expected_problems = [ + f"User[0].project: Unable to validate Project reference: {SAMPLE_PROJECT_UUID!r}", + f"User[0].user_institution: Unable to validate Institution reference: {SAMPLE_INSTITUTION_UUID!r}" ] + expected_problem_lines = [f"Problem: {problem}" for problem in expected_problems] + assert exc.value.problems == expected_problems + assert printed.lines == expected_problem_lines checker = TableChecker(SAMPLE_WORKBOOK_WITH_MATCHED_UUID_REFS, flattened=True,