Skip to content

Commit

Permalink
Minor changes to utility/troubleshooting/convenience script view-port…
Browse files Browse the repository at this point in the history
…al-object.
  • Loading branch information
dmichaels-harvard committed Aug 19, 2024
1 parent a895507 commit 720e68f
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 13 deletions.
18 changes: 12 additions & 6 deletions dcicutils/scripts/update_portal_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@
# Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront.
# ------------------------------------------------------------------------------------------------------
# Example commands:
# update-portal-object --post file_format.json
# update-portal-object --upsert directory-with-schema-named-dot-json-files
# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads
#
# update-portal-object --load {json-file | directory-with-json-files}
# update-portal-object --post {json-file | directory-with-json-files}
# update-portal-object --upsert {json-file | directory-with-json-files}
# update-portal-object --patch {json-file | directory-with-json-files}
#
# The specified json-file or file withing directory-with-jaon-files must be JSON containing either
# a list of objects, which which case the file name for the target schema name, or if not, then
# the --schema option must be used to specified the target schema; or the JSON must be a dictionary
# of schema names, where the value of each is a list of objects for that schema.
# --------------------------------------------------------------------------------------------------

import argparse
Expand Down Expand Up @@ -464,8 +471,7 @@ def loadxl_print(arg):
# view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root:
# <encoded.root.SMAHTRoot object at 0x136d41460>, vroot: <encoded.root.SMAHTRoot object at 0x136d41460>, vroot_path: ()"}\' # noqa
#
if ((item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and
(len(item_type.groups()) == 1)): # noqa
if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1): # noqa
item_type = to_snake_case(item_type.group(1))
identifying_value = f"/{to_camel_case(item_type)}{identifying_value}"
unresolved_link_error_message_prefix = "Unable to resolve link:"
Expand Down Expand Up @@ -628,7 +634,7 @@ def loadxl_print(arg):
_print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic
f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}")
for item in sorted(loadxl_summary.keys()):
_print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}")
_print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}") # TODO: straightend out this arithmetic
if loadxl_unresolved:
_print("✗ Unresolved references:")
for item in loadxl_unresolved:
Expand Down
39 changes: 33 additions & 6 deletions dcicutils/scripts/view_portal_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,23 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq
_print()
return None

def get_metadata_types(path: str) -> Optional[dict]:
nonlocal portal, debug
metadata_types = {}
try:
if debug:
_print(f"Executing separted query to get actual metadata types for raw/inserts query.")
if ((response := portal.get(path)) and (response.status_code in [200, 307]) and
(response := response.json()) and (results := response.get("@graph"))): # noqa
for result in results:
if (result_type := result.get("@type")) and (result_uuid := result.get("uuid")):
if ((isinstance(result_type, list) and (result_type := result_type[0])) or
isinstance(result_type, str)): # noqa
metadata_types[result_uuid] = result_type
except Exception:
return None
return metadata_types

response = None
try:
if not uuid.startswith("/"):
Expand All @@ -312,17 +329,20 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq
if not response.json:
_exit(f"Invalid JSON getting Portal object: {uuid}")
response = response.json()
response_types = {}
if inserts:
# Format results as suitable for inserts (e.g. via update-portal-object).
response.pop("schema_version", None)
if ((isinstance(results := response.get("@graph"), list) and results) and
(isinstance(results_type := response.get("@type"), list) and results_type) and
(isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and
(results_type := results_type[0:-len("SearchResults")])): # noqa
# For search results, the type (from XyzSearchResults, above) may not be precisely correct for
# each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun);
# For (raw frame) search results, the type (from XyzSearchResults, above) may not be precisely correct
# for each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun);
# so for types which are supertypes (gotten via Portal.get_schemas_super_type_map) we actually
# lookup each result individually to determine its actual precise type.
# lookup each result individually to determine its actual precise type. Although, if we have
# more than (say) 5 results to do this for, then do a separate query (get_metadata_types)
# to get the result types all at once.
if not ((supertypes := portal.get_schemas_super_type_map()) and (subtypes := supertypes.get(results_type))):
subtypes = None
response = {}
Expand All @@ -335,9 +355,16 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq
result.pop("schema_version", None)
result = prune_data(result)
if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and
(result_uuid := result.get("uuid")) and
(individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa
result_type = individual_result_type
(result_uuid := result.get("uuid"))): # noqa
# If we have more than (say) 5 results for which we need to determine that actual result type,
# then get them all at once via separate query (get_metadata_types)) which is not the raw frame.
if (results_total > 5) and (not response_types):
response_types = get_metadata_types(path)
if not (response_types and (result_type := response_types.get(result_uuid))):
if individual_result_type := get_metadata_for_individual_result_type(result_uuid):
result_type = individual_result_type
else:
result_type = results_type
else:
result_type = results_type
if response.get(result_type):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicutils"
version = "8.14.0.1b21" # TODO: To become 8.14.1
version = "8.14.0.1b22" # TODO: To become 8.14.1
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 720e68f

Please sign in to comment.