Minor changes to utility/troubleshooting/convenience script view-port…

…al-object.
4dn-dcic · Aug 19, 2024 · 720e68f · 720e68f
1 parent a895507
commit 720e68f
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 13 deletions.
diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py
@@ -2,9 +2,16 @@
 # Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront.
 # ------------------------------------------------------------------------------------------------------
 # Example commands:
-# update-portal-object --post file_format.json
-# update-portal-object --upsert directory-with-schema-named-dot-json-files
-# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads
+#
+# update-portal-object --load {json-file | directory-with-json-files}
+# update-portal-object --post {json-file | directory-with-json-files}
+# update-portal-object --upsert {json-file | directory-with-json-files}
+# update-portal-object --patch {json-file | directory-with-json-files}
+#
+# The specified json-file or file withing directory-with-jaon-files must be JSON containing either
+# a list of objects, which which case the file name for the target schema name, or if not, then
+# the --schema option must be used to specified the target schema; or the JSON must be a dictionary
+# of schema names, where the value of each is a list of objects for that schema.
 # --------------------------------------------------------------------------------------------------
 
 import argparse
@@ -464,8 +471,7 @@ def loadxl_print(arg):
                 # view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root:
                 # <encoded.root.SMAHTRoot object at 0x136d41460>, vroot: <encoded.root.SMAHTRoot object at 0x136d41460>, vroot_path: ()"}\' # noqa
                 #
-                if ((item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and
-                    (len(item_type.groups()) == 1)):  # noqa
+                if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1):  # noqa
                     item_type = to_snake_case(item_type.group(1))
                     identifying_value = f"/{to_camel_case(item_type)}{identifying_value}"
                 unresolved_link_error_message_prefix = "Unable to resolve link:"
@@ -628,7 +634,7 @@ def loadxl_print(arg):
         _print(f"Total items loaded: {loadxl_total_item_count // 2}"  # TODO: straightend out this arithmetic
                f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}")
         for item in sorted(loadxl_summary.keys()):
-            _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}")
+            _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}")  # TODO: straightend out this arithmetic
     if loadxl_unresolved:
         _print("✗ Unresolved references:")
         for item in loadxl_unresolved:

diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py
@@ -290,6 +290,23 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]:  # noq
             _print()
         return None
 
+    def get_metadata_types(path: str) -> Optional[dict]:
+        nonlocal portal, debug
+        metadata_types = {}
+        try:
+            if debug:
+                _print(f"Executing separted query to get actual  metadata types for raw/inserts query.")
+            if ((response := portal.get(path)) and (response.status_code in [200, 307]) and
+                (response := response.json()) and (results := response.get("@graph"))):  # noqa
+                for result in results:
+                    if (result_type := result.get("@type")) and (result_uuid := result.get("uuid")):
+                        if ((isinstance(result_type, list) and (result_type := result_type[0])) or
+                            isinstance(result_type, str)):  # noqa
+                            metadata_types[result_uuid] = result_type
+        except Exception:
+            return None
+        return metadata_types
+
     response = None
     try:
         if not uuid.startswith("/"):
@@ -312,17 +329,20 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]:  # noq
     if not response.json:
         _exit(f"Invalid JSON getting Portal object: {uuid}")
     response = response.json()
+    response_types = {}
     if inserts:
         # Format results as suitable for inserts (e.g. via update-portal-object).
         response.pop("schema_version", None)
         if ((isinstance(results := response.get("@graph"), list) and results) and
             (isinstance(results_type := response.get("@type"), list) and results_type) and
             (isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and
             (results_type := results_type[0:-len("SearchResults")])):  # noqa
-            # For search results, the type (from XyzSearchResults, above) may not be precisely correct for
-            # each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun);
+            # For (raw frame) search results, the type (from XyzSearchResults, above) may not be precisely correct
+            # for each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun);
             # so for types which are supertypes (gotten via Portal.get_schemas_super_type_map) we actually
-            # lookup each result individually to determine its actual precise type.
+            # lookup each result individually to determine its actual precise type. Although, if we have
+            # more than (say) 5 results to do this for, then do a separate query (get_metadata_types)
+            # to get the result types all at once.
             if not ((supertypes := portal.get_schemas_super_type_map()) and (subtypes := supertypes.get(results_type))):
                 subtypes = None
             response = {}
@@ -335,9 +355,16 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]:  # noq
                 result.pop("schema_version", None)
                 result = prune_data(result)
                 if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and
-                    (result_uuid := result.get("uuid")) and
-                    (individual_result_type := get_metadata_for_individual_result_type(result_uuid))):  # noqa
-                    result_type = individual_result_type
+                    (result_uuid := result.get("uuid"))):  # noqa
+                    # If we have more than (say) 5 results for which we need to determine that actual result type,
+                    # then get them all at once via separate query (get_metadata_types)) which is not the raw frame.
+                    if (results_total > 5) and (not response_types):
+                        response_types = get_metadata_types(path)
+                    if not (response_types and (result_type := response_types.get(result_uuid))):
+                        if individual_result_type := get_metadata_for_individual_result_type(result_uuid):
+                            result_type = individual_result_type
+                        else:
+                            result_type = results_type
                 else:
                     result_type = results_type
                 if response.get(result_type):

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcicutils"
-version = "8.14.0.1b21"  # TODO: To become 8.14.1
+version = "8.14.0.1b22"  # TODO: To become 8.14.1
 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
 authors = ["4DN-DCIC Team <[email protected]>"]
 license = "MIT"