From 720e68ff3fe8224838d82617ba60ebdafe3aedaa Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:14:00 -0400 Subject: [PATCH] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 18 +++++++---- dcicutils/scripts/view_portal_object.py | 39 +++++++++++++++++++---- pyproject.toml | 2 +- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index fb8557569..8c437fc78 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -2,9 +2,16 @@ # Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. # ------------------------------------------------------------------------------------------------------ # Example commands: -# update-portal-object --post file_format.json -# update-portal-object --upsert directory-with-schema-named-dot-json-files -# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads +# +# update-portal-object --load {json-file | directory-with-json-files} +# update-portal-object --post {json-file | directory-with-json-files} +# update-portal-object --upsert {json-file | directory-with-json-files} +# update-portal-object --patch {json-file | directory-with-json-files} +# +# The specified json-file or file withing directory-with-jaon-files must be JSON containing either +# a list of objects, which which case the file name for the target schema name, or if not, then +# the --schema option must be used to specified the target schema; or the JSON must be a dictionary +# of schema names, where the value of each is a list of objects for that schema. # -------------------------------------------------------------------------------------------------- import argparse @@ -464,8 +471,7 @@ def loadxl_print(arg): # view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root: # , vroot: , vroot_path: ()"}\' # noqa # - if ((item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and - (len(item_type.groups()) == 1)): # noqa + if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1): # noqa item_type = to_snake_case(item_type.group(1)) identifying_value = f"/{to_camel_case(item_type)}{identifying_value}" unresolved_link_error_message_prefix = "Unable to resolve link:" @@ -628,7 +634,7 @@ def loadxl_print(arg): _print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): - _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") + _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}") # TODO: straightend out this arithmetic if loadxl_unresolved: _print("✗ Unresolved references:") for item in loadxl_unresolved: diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 1128d1539..4abe0ca01 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -290,6 +290,23 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq _print() return None + def get_metadata_types(path: str) -> Optional[dict]: + nonlocal portal, debug + metadata_types = {} + try: + if debug: + _print(f"Executing separted query to get actual metadata types for raw/inserts query.") + if ((response := portal.get(path)) and (response.status_code in [200, 307]) and + (response := response.json()) and (results := response.get("@graph"))): # noqa + for result in results: + if (result_type := result.get("@type")) and (result_uuid := result.get("uuid")): + if ((isinstance(result_type, list) and (result_type := result_type[0])) or + isinstance(result_type, str)): # noqa + metadata_types[result_uuid] = result_type + except Exception: + return None + return metadata_types + response = None try: if not uuid.startswith("/"): @@ -312,6 +329,7 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq if not response.json: _exit(f"Invalid JSON getting Portal object: {uuid}") response = response.json() + response_types = {} if inserts: # Format results as suitable for inserts (e.g. via update-portal-object). response.pop("schema_version", None) @@ -319,10 +337,12 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq (isinstance(results_type := response.get("@type"), list) and results_type) and (isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and (results_type := results_type[0:-len("SearchResults")])): # noqa - # For search results, the type (from XyzSearchResults, above) may not be precisely correct for - # each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun); + # For (raw frame) search results, the type (from XyzSearchResults, above) may not be precisely correct + # for each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun); # so for types which are supertypes (gotten via Portal.get_schemas_super_type_map) we actually - # lookup each result individually to determine its actual precise type. + # lookup each result individually to determine its actual precise type. Although, if we have + # more than (say) 5 results to do this for, then do a separate query (get_metadata_types) + # to get the result types all at once. if not ((supertypes := portal.get_schemas_super_type_map()) and (subtypes := supertypes.get(results_type))): subtypes = None response = {} @@ -335,9 +355,16 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq result.pop("schema_version", None) result = prune_data(result) if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and - (result_uuid := result.get("uuid")) and - (individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa - result_type = individual_result_type + (result_uuid := result.get("uuid"))): # noqa + # If we have more than (say) 5 results for which we need to determine that actual result type, + # then get them all at once via separate query (get_metadata_types)) which is not the raw frame. + if (results_total > 5) and (not response_types): + response_types = get_metadata_types(path) + if not (response_types and (result_type := response_types.get(result_uuid))): + if individual_result_type := get_metadata_for_individual_result_type(result_uuid): + result_type = individual_result_type + else: + result_type = results_type else: result_type = results_type if response.get(result_type): diff --git a/pyproject.toml b/pyproject.toml index 9c900b43a..3bade115e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b21" # TODO: To become 8.14.1 +version = "8.14.0.1b22" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"