From 7ba1322dfdb4cc8f04de0c89105d738b3f6df68b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Wed, 14 Aug 2024 13:12:21 -0400 Subject: [PATCH 01/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- CHANGELOG.rst | 5 +++++ dcicutils/scripts/view_portal_object.py | 24 +++++++++++++++++++++--- pyproject.toml | 2 +- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d934d7947..c3d025357 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,11 @@ dcicutils Change Log ---------- +8.14.1 +====== +* Minor changes to utility/troubleshooting/convenience script view-portal-object. + + 8.14.0 ====== * Minor updates to the view-portal-object dev/troubleshooting utility script. diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 90f459303..e0d1dd3ba 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -279,13 +279,31 @@ def _get_portal_object(portal: Portal, uuid: str, if not ((supertypes := portal.get_schemas_super_type_map()) and (subtypes := supertypes.get(results_type))): subtypes = None response = {} + results_index = 0 + results_total = len(results) + def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noqa + # There can be a lot of individual results for which we may need to get the actual type, + # so do this in a function we were can give verbose output feedback. + nonlocal portal, results_index, results_total, verbose + if verbose: + _print(f"Getting actual type for {results_type} result:" + f" {uuid} [{results_index} of {results_total}]", end="") + result = portal.get_metadata(uuid, raise_exception=False) + if (isinstance(result_types := result.get("@type"), list) and + result_types and (result_type := result_types[0])): # noqa + if verbose: + _print(f" -> {result_type}") + return result_type + if verbose: + _print() + return None for result in results: + results_index += 1 result.pop("schema_version", None) if (subtypes and (result_uuid := result.get("uuid")) and - (individual_result := portal.get_metadata(result_uuid, raise_exception=False)) and - isinstance(result_type:= individual_result.get("@type"), list) and result_type and result_type[0]): # noqa - result_type = result_type[0] + (individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa + result_type = individual_result_type else: result_type = results_type if response.get(result_type): diff --git a/pyproject.toml b/pyproject.toml index c47099bb5..5c1783817 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0" +version = "8.14.0.1b1" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 9595acce696a320bf929704d618207961ca98f9f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 07:44:23 -0400 Subject: [PATCH 02/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- CHANGELOG.rst | 2 +- dcicutils/scripts/update_portal_object.py | 33 +++++++++++++++++------ pyproject.toml | 2 +- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c3d025357..3d867dcdb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,7 +8,7 @@ Change Log 8.14.1 ====== -* Minor changes to utility/troubleshooting/convenience script view-portal-object. +* Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. 8.14.0 diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index c5b29255b..b7981af71 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -19,7 +19,7 @@ from dcicutils.command_utils import yes_or_no from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT from dcicutils.ff_utils import delete_metadata, purge_metadata -from dcicutils.misc_utils import get_error_message, PRINT +from dcicutils.misc_utils import get_error_message, ignored, PRINT from dcicutils.portal_utils import Portal as PortalFromUtils @@ -40,7 +40,7 @@ def purge_metadata(self, object_id: str) -> Optional[dict]: _SMAHT_ENV_ENVIRON_NAME = "SMAHT_ENV" # Schema properties to ignore (by default) for the view schema usage. -_SCHEMAS_IGNORE_PROPERTIES = [ +_IGNORE_PROPERTIES_ON_UPDATE = [ "date_created", "last_modified", "principals_allowed", @@ -122,6 +122,8 @@ def main(): parser.add_argument("--upsert", type=str, required=False, default=None, help="Upsert data.") parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") + parser.add_argument("--noignore", action="store_true", required=False, default=False, + help="Do not ignore standard fields on insert.") parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") @@ -156,6 +158,7 @@ def usage(message: Optional[str] = None) -> None: explicit_schema_name=explicit_schema_name, update_function=post_data, update_action_name="POST", + noignore=args.noignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) if args.patch: _post_or_patch_or_upsert(portal=portal, @@ -164,6 +167,7 @@ def usage(message: Optional[str] = None) -> None: update_function=patch_data, update_action_name="PATCH", patch_delete_fields=args.delete, + noignore=args.noignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) args.delete = None if args.upsert: @@ -173,6 +177,7 @@ def usage(message: Optional[str] = None) -> None: update_function=upsert_data, update_action_name="UPSERT", patch_delete_fields=args.delete, + noignore=args.noignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) args.delete = None @@ -196,6 +201,7 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str, explicit_schema_name: str, update_function: Callable, update_action_name: str, patch_delete_fields: Optional[str] = None, + noignore: bool = False, confirm: bool = False, verbose: bool = False, quiet: bool = False, debug: bool = False) -> None: @@ -221,7 +227,7 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str if debug: _print(f"DEBUG: File ({file}) contains an object of type: {schema_name}") update_function(portal, data, schema_name, file=file, - patch_delete_fields=patch_delete_fields, + patch_delete_fields=patch_delete_fields, noignore=noignore, confirm=confirm, verbose=verbose, debug=debug) elif is_schema_name_list(portal, list(data.keys())): if debug: @@ -232,7 +238,7 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"DEBUG: Processing {update_action_name}s for type: {schema_name}") for index, item in enumerate(schema_data): update_function(portal, item, schema_name, file=file, index=index, - patch_delete_fields=patch_delete_fields, + patch_delete_fields=patch_delete_fields, noignore=noignore, confirm=confirm, verbose=verbose, debug=debug) else: _print(f"WARNING: File ({file}) contains schema item which is not a list: {schema_name}") @@ -243,7 +249,7 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") for index, item in enumerate(data): update_function(portal, item, schema_name, file=file, index=index, - patch_delete_fields=patch_delete_fields, + patch_delete_fields=patch_delete_fields, noignore=noignore, confirm=confirm, verbose=verbose, debug=debug) if debug: _print(f"DEBUG: Processing {update_action_name} file done: {file}") @@ -278,8 +284,9 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str def post_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, # unused here + patch_delete_fields: Optional[str] = None, noignore: bool = False, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: + ignored(patch_delete_fields) if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): _print(f"ERROR: Item for POST has no identifying property: {file} (#{index + 1})") @@ -294,6 +301,7 @@ def post_data(portal: Portal, data: dict, schema_name: str, if verbose: _print(f"POST {schema_name} item: {identifying_path}") try: + data = _prune_data_for_update(data, noignore=noignore) portal.post_metadata(schema_name, data) if debug: _print(f"DEBUG: POST {schema_name} item done: {identifying_path}") @@ -305,7 +313,7 @@ def post_data(portal: Portal, data: dict, schema_name: str, def patch_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, + patch_delete_fields: Optional[str] = None, noignore: bool = False, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): @@ -323,6 +331,7 @@ def patch_data(portal: Portal, data: dict, schema_name: str, try: if delete_fields := _parse_delete_fields(patch_delete_fields): identifying_path += f"?delete_fields={delete_fields}" + data = _prune_data_for_update(data, noignore=noignore) portal.patch_metadata(identifying_path, data) if debug: _print(f"DEBUG: PATCH {schema_name} item OK: {identifying_path}") @@ -334,7 +343,7 @@ def patch_data(portal: Portal, data: dict, schema_name: str, def upsert_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, + patch_delete_fields: Optional[str] = None, noignore: bool = False, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): @@ -349,10 +358,12 @@ def upsert_data(portal: Portal, data: dict, schema_name: str, _print(f"{'PATCH' if exists else 'POST'} {schema_name} item: {identifying_path}") try: if not exists: + data = _prune_data_for_update(data, noignore=noignore) portal.post_metadata(schema_name, data) else: if delete_fields := _parse_delete_fields(patch_delete_fields): identifying_path += f"?delete_fields={delete_fields}" + data = _prune_data_for_update(data, noignore=noignore) portal.patch_metadata(identifying_path, data) if debug: _print(f"DEBUG: UPSERT {schema_name} item OK: {identifying_path}") @@ -362,6 +373,12 @@ def upsert_data(portal: Portal, data: dict, schema_name: str, return +def _prune_data_for_update(data: dict, noignore: bool = False) -> dict: + if noignore is True: + return data + return {key: value for key, value in data.items() if key not in _IGNORE_PROPERTIES_ON_UPDATE} + + def _create_portal(env: Optional[str] = None, app: Optional[str] = None, verbose: bool = False, debug: bool = False) -> Optional[Portal]: diff --git a/pyproject.toml b/pyproject.toml index 5c1783817..76f57f51d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b1" # TODO: To become 8.14.1 +version = "8.14.0.1b2" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 6a4d68ea6746a77c4d402502ca0487ca159d35f3 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 08:48:03 -0400 Subject: [PATCH 03/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/update_portal_object.py | 46 ++++++++++++++--------- dcicutils/scripts/view_portal_object.py | 14 ++++++- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index b7981af71..be807e255 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -123,7 +123,8 @@ def main(): parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") parser.add_argument("--noignore", action="store_true", required=False, default=False, - help="Do not ignore standard fields on insert.") + help="Do not ignore standard fields on update(s).") + parser.add_argument("--ignore", nargs="+", help="Ignore these additional fields.") parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") @@ -158,7 +159,7 @@ def usage(message: Optional[str] = None) -> None: explicit_schema_name=explicit_schema_name, update_function=post_data, update_action_name="POST", - noignore=args.noignore, + noignore=args.noignore, ignore=args.ignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) if args.patch: _post_or_patch_or_upsert(portal=portal, @@ -167,7 +168,7 @@ def usage(message: Optional[str] = None) -> None: update_function=patch_data, update_action_name="PATCH", patch_delete_fields=args.delete, - noignore=args.noignore, + noignore=args.noignore, ignore=args.ignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) args.delete = None if args.upsert: @@ -177,7 +178,7 @@ def usage(message: Optional[str] = None) -> None: update_function=upsert_data, update_action_name="UPSERT", patch_delete_fields=args.delete, - noignore=args.noignore, + noignore=args.noignore, ignore=args.ignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) args.delete = None @@ -201,7 +202,7 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str, explicit_schema_name: str, update_function: Callable, update_action_name: str, patch_delete_fields: Optional[str] = None, - noignore: bool = False, + noignore: bool = False, ignore: Optional[List[str]] = None, confirm: bool = False, verbose: bool = False, quiet: bool = False, debug: bool = False) -> None: @@ -227,7 +228,8 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str if debug: _print(f"DEBUG: File ({file}) contains an object of type: {schema_name}") update_function(portal, data, schema_name, file=file, - patch_delete_fields=patch_delete_fields, noignore=noignore, + patch_delete_fields=patch_delete_fields, + noignore=noignore, ignore=ignore, confirm=confirm, verbose=verbose, debug=debug) elif is_schema_name_list(portal, list(data.keys())): if debug: @@ -238,7 +240,8 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"DEBUG: Processing {update_action_name}s for type: {schema_name}") for index, item in enumerate(schema_data): update_function(portal, item, schema_name, file=file, index=index, - patch_delete_fields=patch_delete_fields, noignore=noignore, + patch_delete_fields=patch_delete_fields, + noignore=noignore, ignore=ignore, confirm=confirm, verbose=verbose, debug=debug) else: _print(f"WARNING: File ({file}) contains schema item which is not a list: {schema_name}") @@ -249,7 +252,8 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") for index, item in enumerate(data): update_function(portal, item, schema_name, file=file, index=index, - patch_delete_fields=patch_delete_fields, noignore=noignore, + patch_delete_fields=patch_delete_fields, + noignore=noignore, ignore=ignore, confirm=confirm, verbose=verbose, debug=debug) if debug: _print(f"DEBUG: Processing {update_action_name} file done: {file}") @@ -284,7 +288,8 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str def post_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, noignore: bool = False, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: ignored(patch_delete_fields) if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): @@ -301,7 +306,7 @@ def post_data(portal: Portal, data: dict, schema_name: str, if verbose: _print(f"POST {schema_name} item: {identifying_path}") try: - data = _prune_data_for_update(data, noignore=noignore) + data = _prune_data_for_update(data, noignore=noignore, ignore=ignore) portal.post_metadata(schema_name, data) if debug: _print(f"DEBUG: POST {schema_name} item done: {identifying_path}") @@ -313,7 +318,8 @@ def post_data(portal: Portal, data: dict, schema_name: str, def patch_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, noignore: bool = False, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): @@ -331,7 +337,7 @@ def patch_data(portal: Portal, data: dict, schema_name: str, try: if delete_fields := _parse_delete_fields(patch_delete_fields): identifying_path += f"?delete_fields={delete_fields}" - data = _prune_data_for_update(data, noignore=noignore) + data = _prune_data_for_update(data, noignore=noignore, ignore=ignore) portal.patch_metadata(identifying_path, data) if debug: _print(f"DEBUG: PATCH {schema_name} item OK: {identifying_path}") @@ -343,7 +349,8 @@ def patch_data(portal: Portal, data: dict, schema_name: str, def upsert_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, noignore: bool = False, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): @@ -358,12 +365,12 @@ def upsert_data(portal: Portal, data: dict, schema_name: str, _print(f"{'PATCH' if exists else 'POST'} {schema_name} item: {identifying_path}") try: if not exists: - data = _prune_data_for_update(data, noignore=noignore) + data = _prune_data_for_update(data, noignore=noignore, ignore=ignore) portal.post_metadata(schema_name, data) else: if delete_fields := _parse_delete_fields(patch_delete_fields): identifying_path += f"?delete_fields={delete_fields}" - data = _prune_data_for_update(data, noignore=noignore) + data = _prune_data_for_update(data, noignore=noignore, ignore=ignore) portal.patch_metadata(identifying_path, data) if debug: _print(f"DEBUG: UPSERT {schema_name} item OK: {identifying_path}") @@ -373,10 +380,13 @@ def upsert_data(portal: Portal, data: dict, schema_name: str, return -def _prune_data_for_update(data: dict, noignore: bool = False) -> dict: - if noignore is True: +def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict: + ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE + if isinstance(ignore, list): + ignore_these_properties = ignore_these_properties + ignore + if not ignore_these_properties: return data - return {key: value for key, value in data.items() if key not in _IGNORE_PROPERTIES_ON_UPDATE} + return {key: value for key, value in data.items() if key not in ignore_these_properties} def _create_portal(env: Optional[str] = None, app: Optional[str] = None, diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index e0d1dd3ba..c0a0da33c 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -104,6 +104,7 @@ def main(): parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") parser.add_argument("--inserts", action="store_true", required=False, default=False, help="Format output for subsequent inserts.") + parser.add_argument("--ignore", nargs="+", help="Ignore these fields for --inserts.") parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.") parser.add_argument("--database", action="store_true", required=False, default=False, help="Read from database output.") @@ -191,7 +192,7 @@ def main(): return data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, inserts=args.inserts, - database=args.database, check=args.bool, verbose=args.verbose) + ignore=args.ignore, database=args.database, check=args.bool, verbose=args.verbose) if args.bool: if data: _print(f"{args.uuid}: found") @@ -242,7 +243,15 @@ def _create_portal(ini: str, env: Optional[str] = None, def _get_portal_object(portal: Portal, uuid: str, raw: bool = False, inserts: bool = False, database: bool = False, + ignore: Optional[List[str]] = None, check: bool = False, verbose: bool = False) -> dict: + + def prune_data(data: dict) -> dict: + nonlocal ignore + if not isinstance(ignore, list) or not ignore: + return data + return {key: value for key, value in data.items() if key not in ignore} + response = None try: if not uuid.startswith("/"): @@ -300,6 +309,7 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq for result in results: results_index += 1 result.pop("schema_version", None) + result = prune_data(result) if (subtypes and (result_uuid := result.get("uuid")) and (individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa @@ -313,7 +323,7 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq # Get the result as non-raw so we can get its type. elif ((response_cooked := portal.get(path, database=database)) and (isinstance(response_type := response_cooked.json().get("@type"), list) and response_type)): - response = {f"{response_type[0]}": [response]} + response = {f"{response_type[0]}": [prune_data(response)]} elif raw: response.pop("schema_version", None) return response From a19a37216897eb7ac29953416f9e9cebb2dd0538 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 08:48:10 -0400 Subject: [PATCH 04/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 76f57f51d..1135025c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b2" # TODO: To become 8.14.1 +version = "8.14.0.1b3" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 83c1f4bbecc7a75bcaecf07f3e56db3b42326485 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 09:51:55 -0400 Subject: [PATCH 05/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/update_portal_object.py | 9 +++++++++ pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index be807e255..1a73db046 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -236,6 +236,7 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"DEBUG: File ({file}) contains a dictionary of schema names.") for schema_name in data: if isinstance(schema_data := data[schema_name], list): + schema_data = _impose_special_ordering(schema_data, schema_name) if debug: _print(f"DEBUG: Processing {update_action_name}s for type: {schema_name}") for index, item in enumerate(schema_data): @@ -250,6 +251,8 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str elif isinstance(data, list): if debug: _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") + import pdb ; pdb.set_trace() # noqa + data = _impose_special_ordering(data, schema_name) for index, item in enumerate(data): update_function(portal, item, schema_name, file=file, index=index, patch_delete_fields=patch_delete_fields, @@ -286,6 +289,12 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str _print(f"ERROR: Cannot find file or directory: {file_or_directory}") +def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]: + if schema_name == "FileFormat": + return sorted(data, key=lambda item: "extra_file_formats" in item) + return data + + def post_data(portal: Portal, data: dict, schema_name: str, file: Optional[str] = None, index: int = 0, patch_delete_fields: Optional[str] = None, diff --git a/pyproject.toml b/pyproject.toml index 1135025c1..363d79c57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b3" # TODO: To become 8.14.1 +version = "8.14.0.1b4" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a842e23716263e0ba5e97ab7ab2726ed84a6e92a Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 11:12:05 -0400 Subject: [PATCH 06/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/view_portal_object.py | 51 ++++++++++++++++++++++--- pyproject.toml | 2 +- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index c0a0da33c..4c2ea9b01 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -66,7 +66,7 @@ import yaml from dcicutils.captured_output import captured_output, uncaptured_output from dcicutils.command_utils import yes_or_no -from dcicutils.misc_utils import get_error_message, is_uuid, PRINT +from dcicutils.misc_utils import get_error_message, is_uuid, PRINT, to_snake_case from dcicutils.portal_utils import Portal @@ -104,6 +104,8 @@ def main(): parser.add_argument("--raw", action="store_true", required=False, default=False, help="Raw output.") parser.add_argument("--inserts", action="store_true", required=False, default=False, help="Format output for subsequent inserts.") + parser.add_argument("--insert-files", action="store_true", required=False, default=False, + help="Output for to insert files.") parser.add_argument("--ignore", nargs="+", help="Ignore these fields for --inserts.") parser.add_argument("--tree", action="store_true", required=False, default=False, help="Tree output for schemas.") parser.add_argument("--database", action="store_true", required=False, default=False, @@ -117,6 +119,7 @@ def main(): parser.add_argument("--indent", required=False, default=False, help="Indent output.", type=int) parser.add_argument("--summary", action="store_true", required=False, default=False, help="Summary output (for schema only).") + parser.add_argument("--force", action="store_true", required=False, default=False, help="Debugging output.") parser.add_argument("--terse", action="store_true", required=False, default=False, help="Terse output.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") @@ -129,6 +132,15 @@ def main(): _print("UUID or schema or path required.") _exit(1) + if args.insert_files: + args.inserts = True + if args.output: + if not os.path.isdir(args.output): + _print(f"Specified output directory for insert files does not exist: {args.output}") + exit(1) + args.insert_files = args.output + args.output = None + if args.output: if os.path.exists(args.output): if os.path.isdir(args.output): @@ -136,7 +148,7 @@ def main(): _exit(1) elif os.path.isfile(args.output): _print(f"Specified output file already exists: {args.output}") - if not yes_or_no(f"Do you want to overwrite this file?"): + if (not args.force) and not yes_or_no(f"Do you want to overwrite this file?"): _exit(0) _output_file = io.open(args.output, "w") @@ -191,8 +203,12 @@ def main(): all=args.all, summary=args.summary, yaml=args.yaml) return - data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, inserts=args.inserts, - ignore=args.ignore, database=args.database, check=args.bool, verbose=args.verbose) + data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, + inserts=args.inserts, insert_files=args.insert_files, + ignore=args.ignore, check=args.bool, force=args.force, verbose=args.verbose) + if args.insert_files: + return + if args.bool: if data: _print(f"{args.uuid}: found") @@ -242,9 +258,10 @@ def _create_portal(ini: str, env: Optional[str] = None, def _get_portal_object(portal: Portal, uuid: str, - raw: bool = False, inserts: bool = False, database: bool = False, + raw: bool = False, database: bool = False, + inserts: bool = False, insert_files: bool = False, ignore: Optional[List[str]] = None, - check: bool = False, verbose: bool = False) -> dict: + check: bool = False, force: bool = False, verbose: bool = False) -> dict: def prune_data(data: dict) -> dict: nonlocal ignore @@ -324,6 +341,28 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq elif ((response_cooked := portal.get(path, database=database)) and (isinstance(response_type := response_cooked.json().get("@type"), list) and response_type)): response = {f"{response_type[0]}": [prune_data(response)]} + if insert_files: + output_directory = insert_files if isinstance(insert_files, str) else os.getcwd() + for schema_name in response: + schema_data = response[schema_name] + file_name = f"{to_snake_case(schema_name)}.json" + file_path = os.path.join(output_directory, file_name) + if os.path.exists(file_path): + if os.path.isdir(file_path): + _print(f"WARNING: Output file already exists as a directory. SKIPPING: {file_path}") + continue + if force: + if verbose: + _print(f"Overwriting extant file (per --force option): {file_path}") + else: + _print(f"Output file already exists: {file_path}") + if (not force) and not yes_or_no(f"Overwrite this file?"): + continue + if verbose: + _print(f"Writing {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" + f" {len(schema_data)}) file: {file_path}") + with io.open(file_path, "w") as f: + json.dump(schema_data, f, indent=4) elif raw: response.pop("schema_version", None) return response diff --git a/pyproject.toml b/pyproject.toml index 363d79c57..2c50a4678 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b4" # TODO: To become 8.14.1 +version = "8.14.0.1b5" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From ff794f1856e419b95200c55a04c8cd46945201b3 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 11:21:57 -0400 Subject: [PATCH 07/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/update_portal_object.py | 1 - dcicutils/scripts/view_portal_object.py | 13 ++++++------- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 1a73db046..53ea2ecad 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -251,7 +251,6 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str elif isinstance(data, list): if debug: _print(f"DEBUG: File ({file}) contains a list of objects of type: {schema_name}") - import pdb ; pdb.set_trace() # noqa data = _impose_special_ordering(data, schema_name) for index, item in enumerate(data): update_function(portal, item, schema_name, file=file, index=index, diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 4c2ea9b01..cbfbb2aa8 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -347,19 +347,18 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq schema_data = response[schema_name] file_name = f"{to_snake_case(schema_name)}.json" file_path = os.path.join(output_directory, file_name) + message_verb = "Writing" if os.path.exists(file_path): + message_verb = "Overwriting" if os.path.isdir(file_path): _print(f"WARNING: Output file already exists as a directory. SKIPPING: {file_path}") continue - if force: - if verbose: - _print(f"Overwriting extant file (per --force option): {file_path}") - else: + if not force: _print(f"Output file already exists: {file_path}") - if (not force) and not yes_or_no(f"Overwrite this file?"): - continue + if not yes_or_no(f"Overwrite this file?"): + continue if verbose: - _print(f"Writing {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" + _print(f"{message_verb} {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" f" {len(schema_data)}) file: {file_path}") with io.open(file_path, "w") as f: json.dump(schema_data, f, indent=4) diff --git a/pyproject.toml b/pyproject.toml index 2c50a4678..e6763c7b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b5" # TODO: To become 8.14.1 +version = "8.14.0.1b6" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From f7f556ad2241dd983e6cb8029c609617e2d80926 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 12:41:29 -0400 Subject: [PATCH 08/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/view_portal_object.py | 76 ++++++++++++++++++------- pyproject.toml | 2 +- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index cbfbb2aa8..15e4c92df 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -122,11 +122,13 @@ def main(): parser.add_argument("--force", action="store_true", required=False, default=False, help="Debugging output.") parser.add_argument("--terse", action="store_true", required=False, default=False, help="Terse output.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") + parser.add_argument("--noheader", action="store_true", required=False, default=False, help="Supress header output.") parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") args = parser.parse_args() portal = _create_portal(ini=args.ini, env=args.env or os.environ.get("SMAHT_ENV"), - server=args.server, app=args.app, verbose=args.verbose, debug=args.debug) + server=args.server, app=args.app, + verbose=args.verbose and not args.noheader, debug=args.debug) if not args.uuid: _print("UUID or schema or path required.") @@ -205,7 +207,8 @@ def main(): data = _get_portal_object(portal=portal, uuid=args.uuid, raw=args.raw, database=args.database, inserts=args.inserts, insert_files=args.insert_files, - ignore=args.ignore, check=args.bool, force=args.force, verbose=args.verbose) + ignore=args.ignore, check=args.bool, + force=args.force, verbose=args.verbose, debug=args.debug) if args.insert_files: return @@ -261,7 +264,8 @@ def _get_portal_object(portal: Portal, uuid: str, raw: bool = False, database: bool = False, inserts: bool = False, insert_files: bool = False, ignore: Optional[List[str]] = None, - check: bool = False, force: bool = False, verbose: bool = False) -> dict: + check: bool = False, force: bool = False, + verbose: bool = False, debug: bool = False) -> dict: def prune_data(data: dict) -> dict: nonlocal ignore @@ -269,6 +273,23 @@ def prune_data(data: dict) -> dict: return data return {key: value for key, value in data.items() if key not in ignore} + def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noqa + # There can be a lot of individual results for which we may need to get the actual type, + # so do this in a function we were can give verbose output feedback. + nonlocal portal, results_index, results_total, verbose + if verbose: + _print(f"Getting actual type for {results_type} result:" + f" {uuid} [{results_index} of {results_total}]", end="") + result = portal.get_metadata(uuid, raise_exception=False) + if (isinstance(result_types := result.get("@type"), list) and + result_types and (result_type := result_types[0])): # noqa + if verbose: + _print(f" -> {result_type}") + return result_type + if verbose: + _print() + return None + response = None try: if not uuid.startswith("/"): @@ -307,27 +328,11 @@ def prune_data(data: dict) -> dict: response = {} results_index = 0 results_total = len(results) - def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noqa - # There can be a lot of individual results for which we may need to get the actual type, - # so do this in a function we were can give verbose output feedback. - nonlocal portal, results_index, results_total, verbose - if verbose: - _print(f"Getting actual type for {results_type} result:" - f" {uuid} [{results_index} of {results_total}]", end="") - result = portal.get_metadata(uuid, raise_exception=False) - if (isinstance(result_types := result.get("@type"), list) and - result_types and (result_type := result_types[0])): # noqa - if verbose: - _print(f" -> {result_type}") - return result_type - if verbose: - _print() - return None for result in results: results_index += 1 result.pop("schema_version", None) result = prune_data(result) - if (subtypes and + if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and (result_uuid := result.get("uuid")) and (individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa result_type = individual_result_type @@ -367,6 +372,37 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq return response +def one_or_more_objects_of_types_exists(portal: Portal, schema_types: List[str], debug: bool = False) -> bool: + for schema_type in schema_types: + try: + if one_or_more_objects_of_type_exists(portal, schema_type, debug=debug): + return True + response = portal.get(f"/{schema_type}") + if response and response.status_code == 404: + _print(f"There are no objects of sub-type: {schema_type}") + return False + except Exception: + return True + return False + + +@lru_cache(maxsize=64) +def one_or_more_objects_of_type_exists(portal: Portal, schema_type: str, debug: bool = False) -> bool: + try: + if debug: + _print(f"Checking if there are actually any objects of type: {schema_type}") + if portal.get(f"/{schema_type}").status_code == 404: + if debug: + _print(f"No any objects of type exist: {schema_type}") + else: + if debug: + _print(f"One or more objects of type exist: {schema_type}") + except Exception as e: + _print(f"ERROR: Checking if there are actually any objects of type: {schema_type}") + _print(e) + return False + + @lru_cache(maxsize=1) def _get_schemas(portal: Portal) -> Optional[dict]: return portal.get_schemas() diff --git a/pyproject.toml b/pyproject.toml index e6763c7b9..82fed8836 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b6" # TODO: To become 8.14.1 +version = "8.14.0.1b7" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 59ffd9a0d5edce5a2f00a8d4b93dd4443d550f3d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Fri, 16 Aug 2024 14:54:53 -0400 Subject: [PATCH 09/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 41 +++++++++++++++++++++-- pyproject.toml | 2 +- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 53ea2ecad..7f84a67d4 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -38,6 +38,7 @@ def purge_metadata(self, object_id: str) -> Optional[dict]: _DEFAULT_APP = "smaht" _SMAHT_ENV_ENVIRON_NAME = "SMAHT_ENV" +_DEFAULT_INI_FILE_FOR_LOAD = "development.ini" # Schema properties to ignore (by default) for the view schema usage. _IGNORE_PROPERTIES_ON_UPDATE = [ @@ -120,6 +121,8 @@ def main(): parser.add_argument("--post", type=str, required=False, default=None, help="POST data.") parser.add_argument("--patch", type=str, required=False, default=None, help="PATCH data.") parser.add_argument("--upsert", type=str, required=False, default=None, help="Upsert data.") + parser.add_argument("--load", type=str, required=False, default=None, help="Load data via snovault.loadxl.") + parser.add_argument("--ini", type=str, required=False, default=None, help="INI file for data via snovault.loadxl.") parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") parser.add_argument("--noignore", action="store_true", required=False, default=False, @@ -143,6 +146,41 @@ def usage(message: Optional[str] = None) -> None: else: app = APP_SMAHT + if not (args.post or args.patch or args.upsert or args.delete or args.purge or args.load): + usage() + + if args.load: + if args.post or args.patch or args.upsert or args.delete or args.purge: + _print("Cannot use any other update option" + "when using the --load option (to load data via snovault.loadxl).") + exit(1) + if args.env: + _print("The --env is not used for the --load option (to load data via snovault.loadxl).") + if args.schema: + _print("The --schema is not used for the --load option (to load data via snovault.loadxl).") + from snovault.loadxl import load_data + from dcicutils.captured_output import captured_output + if args.ini: + ini_file = args.ini + else: + ini_file = _DEFAULT_INI_FILE_FOR_LOAD + if not os.path.exists(ini_file): + _print(f"The INI file required for --load is not found: {ini_file}") + exit(1) + if not os.path.isdir(args.load): + _print(f"Load directory does not exist: {args.load}") + exit(1) + portal = None + with captured_output(not args.debug): + portal = Portal(ini_file) + if args.verbose: + _print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}") + _print(f"Portal INI file for load is: {ini_file}") + load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False) + if args.verbose: + _print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}") + exit(0) + portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) if explicit_schema_name := args.schema: @@ -150,9 +188,6 @@ def usage(message: Optional[str] = None) -> None: if not schema: usage(f"ERROR: Unknown schema name: {args.schema}") - if not (args.post or args.patch or args.upsert or args.delete or args.purge): - usage() - if args.post: _post_or_patch_or_upsert(portal=portal, file_or_directory=args.post, diff --git a/pyproject.toml b/pyproject.toml index 82fed8836..2f24410ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b7" # TODO: To become 8.14.1 +version = "8.14.0.1b8" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 4f950d9613557d45d0eae08e63f8fd64ff854450 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 17 Aug 2024 08:43:44 -0400 Subject: [PATCH 10/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/view_portal_object.py | 18 +++++++----------- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 15e4c92df..1128d1539 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -330,6 +330,8 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq results_total = len(results) for result in results: results_index += 1 + if debug: + print(f"Processing result: {results_index}") result.pop("schema_version", None) result = prune_data(result) if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and @@ -374,14 +376,7 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq def one_or_more_objects_of_types_exists(portal: Portal, schema_types: List[str], debug: bool = False) -> bool: for schema_type in schema_types: - try: - if one_or_more_objects_of_type_exists(portal, schema_type, debug=debug): - return True - response = portal.get(f"/{schema_type}") - if response and response.status_code == 404: - _print(f"There are no objects of sub-type: {schema_type}") - return False - except Exception: + if one_or_more_objects_of_type_exists(portal, schema_type, debug=debug): return True return False @@ -393,14 +388,15 @@ def one_or_more_objects_of_type_exists(portal: Portal, schema_type: str, debug: _print(f"Checking if there are actually any objects of type: {schema_type}") if portal.get(f"/{schema_type}").status_code == 404: if debug: - _print(f"No any objects of type exist: {schema_type}") + _print(f"No objects of type actually exist: {schema_type}") + return False else: if debug: _print(f"One or more objects of type exist: {schema_type}") except Exception as e: - _print(f"ERROR: Checking if there are actually any objects of type: {schema_type}") + _print(f"ERROR: Cannot determine if there are actually any objects of type: {schema_type}") _print(e) - return False + return True @lru_cache(maxsize=1) diff --git a/pyproject.toml b/pyproject.toml index 2f24410ec..0f6e6cf13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b8" # TODO: To become 8.14.1 +version = "8.14.0.1b9" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 5e5a23f1238ae1443ce69bf72c55b5f93ea9c50c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 17 Aug 2024 14:13:40 -0400 Subject: [PATCH 11/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 187 +++++++++++++++++----- pyproject.toml | 2 +- 2 files changed, 148 insertions(+), 41 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 7f84a67d4..36496817a 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -14,13 +14,15 @@ import json import os import re +import shutil import sys from typing import Callable, List, Optional, Tuple, Union from dcicutils.command_utils import yes_or_no from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT from dcicutils.ff_utils import delete_metadata, purge_metadata -from dcicutils.misc_utils import get_error_message, ignored, PRINT +from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case from dcicutils.portal_utils import Portal as PortalFromUtils +from dcicutils.tmpfile_utils import temporary_directory class Portal(PortalFromUtils): @@ -131,6 +133,8 @@ def main(): parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") + parser.add_argument("--noprogress", action="store_true", required=False, default=False, + help="No progress bar output for --load.") parser.add_argument("--debug", action="store_true", required=False, default=False, help="Debugging output.") args = parser.parse_args() @@ -158,27 +162,8 @@ def usage(message: Optional[str] = None) -> None: _print("The --env is not used for the --load option (to load data via snovault.loadxl).") if args.schema: _print("The --schema is not used for the --load option (to load data via snovault.loadxl).") - from snovault.loadxl import load_data - from dcicutils.captured_output import captured_output - if args.ini: - ini_file = args.ini - else: - ini_file = _DEFAULT_INI_FILE_FOR_LOAD - if not os.path.exists(ini_file): - _print(f"The INI file required for --load is not found: {ini_file}") - exit(1) - if not os.path.isdir(args.load): - _print(f"Load directory does not exist: {args.load}") - exit(1) - portal = None - with captured_output(not args.debug): - portal = Portal(ini_file) - if args.verbose: - _print(f"Loading data files into Portal (via snovault.loadxl) from: {args.load}") - _print(f"Portal INI file for load is: {ini_file}") - load_data(portal.vapp, indir=args.load, overwrite=True, use_master_inserts=False) - if args.verbose: - _print(f"Done loading data into Portal (via snovault.loadxl) files from: {args.load}") + _load_data(inserts_directory=args.load, ini_file=args.ini, + verbose=args.verbose, debug=args.debug, noprogress=args.noprogress) exit(0) portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) @@ -192,7 +177,7 @@ def usage(message: Optional[str] = None) -> None: _post_or_patch_or_upsert(portal=portal, file_or_directory=args.post, explicit_schema_name=explicit_schema_name, - update_function=post_data, + update_function=_post_data, update_action_name="POST", noignore=args.noignore, ignore=args.ignore, confirm=args.confirm, verbose=args.verbose, quiet=args.quiet, debug=args.debug) @@ -200,7 +185,7 @@ def usage(message: Optional[str] = None) -> None: _post_or_patch_or_upsert(portal=portal, file_or_directory=args.patch, explicit_schema_name=explicit_schema_name, - update_function=patch_data, + update_function=_patch_data, update_action_name="PATCH", patch_delete_fields=args.delete, noignore=args.noignore, ignore=args.ignore, @@ -210,7 +195,7 @@ def usage(message: Optional[str] = None) -> None: _post_or_patch_or_upsert(portal=portal, file_or_directory=args.upsert, explicit_schema_name=explicit_schema_name, - update_function=upsert_data, + update_function=_upsert_data, update_action_name="UPSERT", patch_delete_fields=args.delete, noignore=args.noignore, ignore=args.ignore, @@ -329,11 +314,11 @@ def _impose_special_ordering(data: List[dict], schema_name: str) -> List[dict]: return data -def post_data(portal: Portal, data: dict, schema_name: str, - file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, - noignore: bool = False, ignore: Optional[List[str]] = None, - confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: +def _post_data(portal: Portal, data: dict, schema_name: str, + file: Optional[str] = None, index: int = 0, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, + confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: ignored(patch_delete_fields) if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): @@ -359,11 +344,11 @@ def post_data(portal: Portal, data: dict, schema_name: str, return -def patch_data(portal: Portal, data: dict, schema_name: str, - file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, - noignore: bool = False, ignore: Optional[List[str]] = None, - confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: +def _patch_data(portal: Portal, data: dict, schema_name: str, + file: Optional[str] = None, index: int = 0, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, + confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): _print(f"ERROR: Item for PATCH has no identifying property: {file} (#{index + 1})") @@ -390,11 +375,11 @@ def patch_data(portal: Portal, data: dict, schema_name: str, return -def upsert_data(portal: Portal, data: dict, schema_name: str, - file: Optional[str] = None, index: int = 0, - patch_delete_fields: Optional[str] = None, - noignore: bool = False, ignore: Optional[List[str]] = None, - confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: +def _upsert_data(portal: Portal, data: dict, schema_name: str, + file: Optional[str] = None, index: int = 0, + patch_delete_fields: Optional[str] = None, + noignore: bool = False, ignore: Optional[List[str]] = None, + confirm: bool = False, verbose: bool = False, debug: bool = False) -> None: if not (identifying_path := portal.get_identifying_path(data, portal_type=schema_name)): if isinstance(file, str) and isinstance(index, int): _print(f"ERROR: Item for UPSERT has no identifying property: {file} (#{index + 1})") @@ -423,6 +408,126 @@ def upsert_data(portal: Portal, data: dict, schema_name: str, return +def _load_data(inserts_directory: str, ini_file: str, + verbose: bool = False, debug: bool = False, noprogress: bool = False) -> None: + + from snovault.loadxl import load_all_gen, LoadGenWrapper + from dcicutils.captured_output import captured_output + from dcicutils.progress_bar import ProgressBar + + def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict): + + nonlocal LoadGenWrapper, load_all_gen, verbose, debug + progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes + progress_bar = ProgressBar(progress_total) if not noprogress else None + + def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str: + if not isinstance(encoding, str): + encoding = "utf-8" + if isinstance(str_or_bytes, bytes): + return str_or_bytes.decode(encoding).strip() + elif isinstance(str_or_bytes, str): + return str_or_bytes.strip() + return "" + + LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$") + LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Skip", + "CHECK": "Validate", "ERROR": "Error"} + current_item_type = None + current_item_count = 0 + current_item_total = 0 + total_item_count = 0 + for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory, + docsdir=None, overwrite=True, verbose=True)): + total_item_count += 1 + item = decode_bytes(item) + match = LOADXL_RESPONSE_PATTERN.match(item) + if not match or match.re.groups < 3: + continue + action = LOADXL_ACTION_NAME[match.group(1).upper()] + # response_value = match.group(0) + # identifying_value = match.group(2) + item_type = match.group(3) + if current_item_type != item_type: + if noprogress and debug and current_item_type is not None: + print() + current_item_type = item_type + current_item_count = 0 + current_item_total = schema_names_to_load[item_type] + if progress_bar: + progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}") + current_item_count += 1 + if progress_bar: + progress_bar.set_progress(total_item_count) + elif debug: + print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})") + if progress_bar: + progress_bar.set_description("▶ Load Complete") + print() + + if not ini_file: + ini_file = _DEFAULT_INI_FILE_FOR_LOAD + if not os.path.isabs(ini_file := os.path.expanduser(ini_file)): + ini_file = os.path.join(os.getcwd(), ini_file) + if not os.path.exists(ini_file): + _print(f"The INI file required for --load is not found: {ini_file}") + exit(1) + if not os.path.isabs(inserts_directory := os.path.expanduser(inserts_directory)): + inserts_directory = os.path.join(os.getcwd(), inserts_directory) + if not os.path.isdir(inserts_directory := os.path.expanduser(inserts_directory)): + _print(f"Load directory does not exist: {inserts_directory}") + exit(1) + portal = None + with captured_output(not debug): + portal = Portal(ini_file) + if verbose: + _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}") + _print(f"Portal INI file for load is: {ini_file}") + + schema_names = list(_get_schemas(portal).keys()) + schema_snake_case_names = [to_snake_case(item) for item in schema_names] + schema_names_to_load = {} + + copy_to_temporary_directory = False + for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")): + json_file_name = os.path.basename(json_file_path) + schema_name = os.path.basename(json_file_name)[:-len(".json")] + if (schema_name not in schema_snake_case_names) and (schema_name not in schema_names): + _print(f"File is not named for a known schema: {json_file_name} ▶ ignoring") + copy_to_temporary_directory = True + else: + try: + with io.open(json_file_path, "r") as f: + if not isinstance(data := json.load(f), list): + _print("Data JSON file does not contain an array: {json_file_path} ▶ ignoring") + copy_to_temporary_directory = True + elif (nobjects := len(data)) < 1: + _print("Data JSON file contains no items: {json_file_path} ▶ ignoring") + copy_to_temporary_directory = True + else: + schema_names_to_load[schema_name] = nobjects + except Exception: + _print("Cannot load JSON data from file: {json_file_path} ▶ ignoring") + copy_to_temporary_directory = True + if not schema_names_to_load: + _print("Directory contains no valid data: {inserts_directory}") + return + if copy_to_temporary_directory: + with temporary_directory() as tmpdir: + if debug: + _print(f"Using temporary directory: {tmpdir}") + for json_file_path in glob.glob(os.path.join(inserts_directory, "*.json")): + json_file_name = os.path.basename(json_file_path) + schema_name = os.path.basename(json_file_name)[:-len(".json")] + if (schema_name in schema_snake_case_names) or (schema_name in schema_names): + shutil.copy(json_file_path, tmpdir) + loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load) + else: + loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load) + if verbose: + _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}") + + def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict: ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE if isinstance(ignore, list): @@ -509,6 +614,8 @@ def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str @lru_cache(maxsize=1) def _get_schemas(portal: Portal) -> Optional[dict]: + if portal.vapp: + return portal.vapp.get("/profiles/?frame=raw").json return portal.get_schemas() diff --git a/pyproject.toml b/pyproject.toml index 0f6e6cf13..cfb52d4e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b9" # TODO: To become 8.14.1 +version = "8.14.0.1b10" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From bcf974aac521c1a7ba53ec8b1ace621e88116267 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 17 Aug 2024 23:33:54 -0400 Subject: [PATCH 12/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 107 ++++++++++++++++------ pyproject.toml | 2 +- 2 files changed, 82 insertions(+), 27 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 36496817a..e3ccd7479 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -162,7 +162,7 @@ def usage(message: Optional[str] = None) -> None: _print("The --env is not used for the --load option (to load data via snovault.loadxl).") if args.schema: _print("The --schema is not used for the --load option (to load data via snovault.loadxl).") - _load_data(inserts_directory=args.load, ini_file=args.ini, + _load_data(load=args.load, ini_file=args.ini, verbose=args.verbose, debug=args.debug, noprogress=args.noprogress) exit(0) @@ -226,14 +226,6 @@ def _post_or_patch_or_upsert(portal: Portal, file_or_directory: str, confirm: bool = False, verbose: bool = False, quiet: bool = False, debug: bool = False) -> None: - def is_schema_name_list(portal: Portal, keys: list) -> bool: - if isinstance(keys, list): - for key in keys: - if portal.get_schema(key) is None: - return False - return True - return False - def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str], patch_delete_fields: Optional[str] = None, confirm: bool = False, verbose: bool = False, @@ -251,7 +243,7 @@ def post_or_patch_or_upsert(portal: Portal, file: str, schema_name: Optional[str patch_delete_fields=patch_delete_fields, noignore=noignore, ignore=ignore, confirm=confirm, verbose=verbose, debug=debug) - elif is_schema_name_list(portal, list(data.keys())): + elif _is_schema_name_list(portal, list(data.keys())): if debug: _print(f"DEBUG: File ({file}) contains a dictionary of schema names.") for schema_name in data: @@ -408,8 +400,8 @@ def _upsert_data(portal: Portal, data: dict, schema_name: str, return -def _load_data(inserts_directory: str, ini_file: str, - verbose: bool = False, debug: bool = False, noprogress: bool = False) -> None: +def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None, + verbose: bool = False, debug: bool = False, noprogress: bool = False) -> bool: from snovault.loadxl import load_all_gen, LoadGenWrapper from dcicutils.captured_output import captured_output @@ -431,7 +423,7 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> return "" LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$") - LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Skip", + LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Check", "CHECK": "Validate", "ERROR": "Error"} current_item_type = None current_item_count = 0 @@ -472,14 +464,66 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> if not os.path.exists(ini_file): _print(f"The INI file required for --load is not found: {ini_file}") exit(1) - if not os.path.isabs(inserts_directory := os.path.expanduser(inserts_directory)): - inserts_directory = os.path.join(os.getcwd(), inserts_directory) - if not os.path.isdir(inserts_directory := os.path.expanduser(inserts_directory)): - _print(f"Load directory does not exist: {inserts_directory}") - exit(1) + + if not os.path.isabs(load := os.path.expanduser(load)): + load = os.path.join(os.getcwd(), load) + if not os.path.exists(load): + return False + + if os.path.isdir(load): + inserts_directory = load + inserts_file = None + else: + inserts_directory = None + inserts_file = load + portal = None with captured_output(not debug): portal = Portal(ini_file) + + if inserts_file: + with io.open(inserts_file, "r") as f: + try: + data = json.load(f) + except Exception as e: + _print(f"Cannot load JSON data from file: {inserts_file}") + return False + if isinstance(data, list): + if not (schema_name := explicit_schema_name): + if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)): + _print("Unable to determine schema name for JSON data file: {inserts_file}") + return False + with temporary_directory() as tmpdir: + file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json") + with io.open(file_name, "w") as f: + json.dump(data, f) + return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=explicit_schema_name, + verbose=verbose, debug=debug, noprogress=noprogress) + elif isinstance(data, dict): + _print("DICT IN FILE FOR LOAD NOT YET SUPPPORTED") + if not _is_schema_name_list(portal, schema_names := list(data.keys())): + _print(f"Unrecognized types in JSON data file: {inserts_file}") + return False + with temporary_directory() as tmpdir: + nfiles = 0 + for schema_name in schema_names: + if not isinstance(schema_data := data[schema_name], list): + _print(f"Unexpected value for data type ({schema_name})" + f" in JSON data file: {inserts_file} ▶ ignoring") + continue + file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json") + with io.open(file_name, "w") as f: + json.dump(schema_data, f) + nfiles += 1 + if nfiles > 0: + return _load_data(load=tmpdir, ini_file=ini_file, + verbose=verbose, debug=debug, noprogress=noprogress) + # TODO + return True + else: + _print(f"Unrecognized JSON data in file: {inserts_file}") + return False + return True if verbose: _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}") _print(f"Portal INI file for load is: {ini_file}") @@ -511,7 +555,7 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> copy_to_temporary_directory = True if not schema_names_to_load: _print("Directory contains no valid data: {inserts_directory}") - return + return False if copy_to_temporary_directory: with temporary_directory() as tmpdir: if debug: @@ -526,8 +570,17 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load) if verbose: _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}") + return True +def _is_schema_name_list(portal: Portal, keys: list) -> bool: + if isinstance(keys, list): + for key in keys: + if portal.get_schema(key) is None: + return False + return True + return False + def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict: ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE if isinstance(ignore, list): @@ -603,13 +656,15 @@ def _parse_delete_fields(value: str) -> str: def _get_schema_name_from_schema_named_json_file_name(portal: Portal, value: str) -> Optional[str]: - try: - if not value.endswith(".json"): - return None - _, schema_name = _get_schema(portal, os.path.basename(value[:-5])) - return schema_name - except Exception: - return False + if isinstance(value, str) and value: + try: + if value.endswith(".json"): + value = value[:-5] + _, schema_name = _get_schema(portal, os.path.basename(value)) + return schema_name + except Exception: + pass + return False @lru_cache(maxsize=1) diff --git a/pyproject.toml b/pyproject.toml index cfb52d4e4..4e455a9ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b10" # TODO: To become 8.14.1 +version = "8.14.0.1b11" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a7baf95f5e7f45cf6ed0f168473492afcc40f395 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sat, 17 Aug 2024 23:34:32 -0400 Subject: [PATCH 13/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index e3ccd7479..8d11003d2 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -485,7 +485,7 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> with io.open(inserts_file, "r") as f: try: data = json.load(f) - except Exception as e: + except Exception: _print(f"Cannot load JSON data from file: {inserts_file}") return False if isinstance(data, list): @@ -509,7 +509,7 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> for schema_name in schema_names: if not isinstance(schema_data := data[schema_name], list): _print(f"Unexpected value for data type ({schema_name})" - f" in JSON data file: {inserts_file} ▶ ignoring") + f" in JSON data file: {inserts_file} ▶ ignoring") continue file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json") with io.open(file_name, "w") as f: @@ -581,6 +581,7 @@ def _is_schema_name_list(portal: Portal, keys: list) -> bool: return True return False + def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[List[str]] = None) -> dict: ignore_these_properties = [] if noignore is True else _IGNORE_PROPERTIES_ON_UPDATE if isinstance(ignore, list): From 0624763ffc328cd3dc1106cc16d542ce28f399a9 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 01:49:32 -0400 Subject: [PATCH 14/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 22 ++++++++++++++-------- pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 8d11003d2..71e1c0418 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -401,7 +401,8 @@ def _upsert_data(portal: Portal, data: dict, schema_name: str, def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None, - verbose: bool = False, debug: bool = False, noprogress: bool = False) -> bool: + verbose: bool = False, debug: bool = False, noprogress: bool = False, + _portal: Optional[Portal] = None, _single_insert_file: Optional[str] = None) -> bool: from snovault.loadxl import load_all_gen, LoadGenWrapper from dcicutils.captured_output import captured_output @@ -411,7 +412,7 @@ def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict): nonlocal LoadGenWrapper, load_all_gen, verbose, debug progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes - progress_bar = ProgressBar(progress_total) if not noprogress else None + progress_bar = ProgressBar(progress_total, interrupt_exit=True) if not noprogress else None def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> str: if not isinstance(encoding, str): @@ -477,9 +478,9 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> inserts_directory = None inserts_file = load - portal = None - with captured_output(not debug): - portal = Portal(ini_file) + if not (portal := _portal): + with captured_output(not debug): + portal = Portal(ini_file) if inserts_file: with io.open(inserts_file, "r") as f: @@ -498,7 +499,8 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> with io.open(file_name, "w") as f: json.dump(data, f) return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=explicit_schema_name, - verbose=verbose, debug=debug, noprogress=noprogress) + verbose=verbose, debug=debug, noprogress=noprogress, + _portal=portal, _single_insert_file=inserts_file) elif isinstance(data, dict): _print("DICT IN FILE FOR LOAD NOT YET SUPPPORTED") if not _is_schema_name_list(portal, schema_names := list(data.keys())): @@ -517,7 +519,8 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> nfiles += 1 if nfiles > 0: return _load_data(load=tmpdir, ini_file=ini_file, - verbose=verbose, debug=debug, noprogress=noprogress) + verbose=verbose, debug=debug, noprogress=noprogress, + _portal=portal, _single_insert_file=inserts_file) # TODO return True else: @@ -525,7 +528,10 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> return False return True if verbose: - _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}") + if _single_insert_file: + _print(f"Loading data file into Portal (via snovault.loadxl) from: {_single_insert_file}") + else: + _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}") _print(f"Portal INI file for load is: {ini_file}") schema_names = list(_get_schemas(portal).keys()) diff --git a/pyproject.toml b/pyproject.toml index 4e455a9ff..d4ec91f70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b11" # TODO: To become 8.14.1 +version = "8.14.0.1b12" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 66386b25d2909202721c62202e0f681967b1e1fd Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 10:07:05 -0400 Subject: [PATCH 15/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 39 ++++++++++++++++------- pyproject.toml | 2 +- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 71e1c0418..1dd2e7c68 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -123,7 +123,8 @@ def main(): parser.add_argument("--post", type=str, required=False, default=None, help="POST data.") parser.add_argument("--patch", type=str, required=False, default=None, help="PATCH data.") parser.add_argument("--upsert", type=str, required=False, default=None, help="Upsert data.") - parser.add_argument("--load", type=str, required=False, default=None, help="Load data via snovault.loadxl.") + parser.add_argument("--load", "--loadxl", type=str, required=False, default=None, + help="Load data via snovault.loadxl.") parser.add_argument("--ini", type=str, required=False, default=None, help="INI file for data via snovault.loadxl.") parser.add_argument("--delete", type=str, required=False, default=None, help="Delete data.") parser.add_argument("--purge", type=str, required=False, default=None, help="Purge data.") @@ -159,11 +160,12 @@ def usage(message: Optional[str] = None) -> None: "when using the --load option (to load data via snovault.loadxl).") exit(1) if args.env: - _print("The --env is not used for the --load option (to load data via snovault.loadxl).") - if args.schema: - _print("The --schema is not used for the --load option (to load data via snovault.loadxl).") - _load_data(load=args.load, ini_file=args.ini, - verbose=args.verbose, debug=args.debug, noprogress=args.noprogress) + if args.ini: + _print("The --env is not used for the --load option (to load data via snovault.loadxl).") + args.ini = args.env + if not _load_data(load=args.load, ini_file=args.ini, explicit_schema_name=args.schema, + verbose=args.verbose, debug=args.debug, noprogress=args.noprogress): + exit(1) exit(0) portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) @@ -171,7 +173,7 @@ def usage(message: Optional[str] = None) -> None: if explicit_schema_name := args.schema: schema, explicit_schema_name = _get_schema(portal, explicit_schema_name) if not schema: - usage(f"ERROR: Unknown schema name: {args.schema}") + usage(f"Unknown specified schema name: {args.schema}") if args.post: _post_or_patch_or_upsert(portal=portal, @@ -494,15 +496,25 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)): _print("Unable to determine schema name for JSON data file: {inserts_file}") return False + elif not (schema_name := _get_schema(portal, explicit_schema_name)[1]): + _print(f"Unknown specified schema name: {explicit_schema_name}") + return False with temporary_directory() as tmpdir: file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json") with io.open(file_name, "w") as f: json.dump(data, f) - return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=explicit_schema_name, + return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name, verbose=verbose, debug=debug, noprogress=noprogress, _portal=portal, _single_insert_file=inserts_file) elif isinstance(data, dict): - _print("DICT IN FILE FOR LOAD NOT YET SUPPPORTED") + if schema_name := explicit_schema_name: + if _is_schema_name_list(portal, schema_names := list(data.keys())): + _print(f"Ignoring specify --schema: {schema_name}") + elif not (schema_name := _get_schema(portal, schema_name)[1]): + _print(f"Unknown specified schema name: {explicit_schema_name}") + return False + else: + data = {schema_name: [data]} if not _is_schema_name_list(portal, schema_names := list(data.keys())): _print(f"Unrecognized types in JSON data file: {inserts_file}") return False @@ -529,9 +541,9 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> return True if verbose: if _single_insert_file: - _print(f"Loading data file into Portal (via snovault.loadxl) from: {_single_insert_file}") + _print(f"Loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") else: - _print(f"Loading data files into Portal (via snovault.loadxl) from: {inserts_directory}") + _print(f"Loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") _print(f"Portal INI file for load is: {ini_file}") schema_names = list(_get_schemas(portal).keys()) @@ -575,7 +587,10 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> else: loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load) if verbose: - _print(f"Done loading data into Portal (via snovault.loadxl) files from: {inserts_directory}") + if _single_insert_file: + _print(f"Done loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") + else: + _print(f"Done loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") return True diff --git a/pyproject.toml b/pyproject.toml index d4ec91f70..b4a6fa308 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b12" # TODO: To become 8.14.1 +version = "8.14.0.1b13" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 7d612b75d799f7efb47d6feff8f6050ce0e4947d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 10:15:08 -0400 Subject: [PATCH 16/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 1 - pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 1dd2e7c68..529b4b69f 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -533,7 +533,6 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> return _load_data(load=tmpdir, ini_file=ini_file, verbose=verbose, debug=debug, noprogress=noprogress, _portal=portal, _single_insert_file=inserts_file) - # TODO return True else: _print(f"Unrecognized JSON data in file: {inserts_file}") diff --git a/pyproject.toml b/pyproject.toml index b4a6fa308..272657a80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b13" # TODO: To become 8.14.1 +version = "8.14.0.1b14" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a0ce93f0356ac5fd73e16dfb238f50848f37067c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 10:38:32 -0400 Subject: [PATCH 17/36] Minor changes to utility/troubleshooting/convenience scripts view-portal-object and update-portal-object. --- dcicutils/scripts/update_portal_object.py | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 529b4b69f..227c1b402 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -462,14 +462,14 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> if not ini_file: ini_file = _DEFAULT_INI_FILE_FOR_LOAD - if not os.path.isabs(ini_file := os.path.expanduser(ini_file)): - ini_file = os.path.join(os.getcwd(), ini_file) + if not os.path.isabs(ini_file := os.path.normpath(os.path.expanduser(ini_file))): + ini_file = os.path.normpath(os.path.join(os.getcwd(), ini_file)) if not os.path.exists(ini_file): _print(f"The INI file required for --load is not found: {ini_file}") exit(1) - if not os.path.isabs(load := os.path.expanduser(load)): - load = os.path.join(os.getcwd(), load) + if not os.path.isabs(load := os.path.normpath(os.path.expanduser(load))): + load = os.path.normpath(os.path.join(os.getcwd(), load)) if not os.path.exists(load): return False diff --git a/pyproject.toml b/pyproject.toml index 272657a80..ae1166401 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b14" # TODO: To become 8.14.1 +version = "8.14.0.1b15" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 2054a39b33a69523cdcadb630bc9febabeef1f54 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 14:03:19 -0400 Subject: [PATCH 18/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 148 +++++++++++++--------- pyproject.toml | 2 +- 2 files changed, 89 insertions(+), 61 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 227c1b402..0b7c15b06 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -17,8 +17,9 @@ import shutil import sys from typing import Callable, List, Optional, Tuple, Union +from dcicutils.captured_output import captured_output from dcicutils.command_utils import yes_or_no -from dcicutils.common import ORCHESTRATED_APPS, APP_SMAHT +from dcicutils.common import ORCHESTRATED_APPS, APP_CGAP, APP_FOURFRONT, APP_SMAHT from dcicutils.ff_utils import delete_metadata, purge_metadata from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case from dcicutils.portal_utils import Portal as PortalFromUtils @@ -145,30 +146,16 @@ def usage(message: Optional[str] = None) -> None: parser.print_help() sys.exit(1) - if app := args.app: - if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS): - usage(f"ERROR: Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}") - else: - app = APP_SMAHT - if not (args.post or args.patch or args.upsert or args.delete or args.purge or args.load): usage() + if not (portal := _create_portal(env=args.env, ini=args.ini, app=args.app, load=args.load, + verbose=args.verbose, debug=args.debug, quiet=args.quiet)): + exit(1) + if args.load: - if args.post or args.patch or args.upsert or args.delete or args.purge: - _print("Cannot use any other update option" - "when using the --load option (to load data via snovault.loadxl).") - exit(1) - if args.env: - if args.ini: - _print("The --env is not used for the --load option (to load data via snovault.loadxl).") - args.ini = args.env - if not _load_data(load=args.load, ini_file=args.ini, explicit_schema_name=args.schema, - verbose=args.verbose, debug=args.debug, noprogress=args.noprogress): - exit(1) - exit(0) - - portal = _create_portal(env=args.env, app=app, verbose=args.verbose, debug=args.debug) + _load_data(portal=portal, load=args.load, ini_file=args.ini, explicit_schema_name=args.schema, + verbose=args.verbose, debug=args.debug, noprogress=args.noprogress) if explicit_schema_name := args.schema: schema, explicit_schema_name = _get_schema(portal, explicit_schema_name) @@ -402,17 +389,18 @@ def _upsert_data(portal: Portal, data: dict, schema_name: str, return -def _load_data(load: str, ini_file: str, explicit_schema_name: Optional[str] = None, +def _load_data(portal: Portal, load: str, ini_file: str, explicit_schema_name: Optional[str] = None, verbose: bool = False, debug: bool = False, noprogress: bool = False, - _portal: Optional[Portal] = None, _single_insert_file: Optional[str] = None) -> bool: + _single_insert_file: Optional[str] = None) -> bool: from snovault.loadxl import load_all_gen, LoadGenWrapper - from dcicutils.captured_output import captured_output from dcicutils.progress_bar import ProgressBar + loadxl_summary = {} + def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict): - nonlocal LoadGenWrapper, load_all_gen, verbose, debug + nonlocal LoadGenWrapper, load_all_gen, loadxl_summary, verbose, debug progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes progress_bar = ProgressBar(progress_total, interrupt_exit=True) if not noprogress else None @@ -452,6 +440,9 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> if progress_bar: progress_bar.set_description(f"▶ {to_camel_case(current_item_type)}: {action}") current_item_count += 1 + if loadxl_summary.get(current_item_type, None) is None: + loadxl_summary[current_item_type] = 0 + loadxl_summary[current_item_type] += 1 if progress_bar: progress_bar.set_progress(total_item_count) elif debug: @@ -460,14 +451,9 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> progress_bar.set_description("▶ Load Complete") print() - if not ini_file: - ini_file = _DEFAULT_INI_FILE_FOR_LOAD - if not os.path.isabs(ini_file := os.path.normpath(os.path.expanduser(ini_file))): - ini_file = os.path.normpath(os.path.join(os.getcwd(), ini_file)) - if not os.path.exists(ini_file): - _print(f"The INI file required for --load is not found: {ini_file}") - exit(1) - + if not portal.vapp: + _print("Must using INI based Portal object with --load (use --ini option to specify an INI file).") + return False if not os.path.isabs(load := os.path.normpath(os.path.expanduser(load))): load = os.path.normpath(os.path.join(os.getcwd(), load)) if not os.path.exists(load): @@ -480,10 +466,6 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> inserts_directory = None inserts_file = load - if not (portal := _portal): - with captured_output(not debug): - portal = Portal(ini_file) - if inserts_file: with io.open(inserts_file, "r") as f: try: @@ -503,9 +485,9 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> file_name = os.path.join(tmpdir, f"{to_snake_case(schema_name)}.json") with io.open(file_name, "w") as f: json.dump(data, f) - return _load_data(load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name, + return _load_data(portal=portal, load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name, verbose=verbose, debug=debug, noprogress=noprogress, - _portal=portal, _single_insert_file=inserts_file) + _single_insert_file=inserts_file) elif isinstance(data, dict): if schema_name := explicit_schema_name: if _is_schema_name_list(portal, schema_names := list(data.keys())): @@ -530,20 +512,20 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> json.dump(schema_data, f) nfiles += 1 if nfiles > 0: - return _load_data(load=tmpdir, ini_file=ini_file, + return _load_data(portal=portal, load=tmpdir, ini_file=ini_file, verbose=verbose, debug=debug, noprogress=noprogress, - _portal=portal, _single_insert_file=inserts_file) + _single_insert_file=inserts_file) return True else: _print(f"Unrecognized JSON data in file: {inserts_file}") return False return True + if verbose: if _single_insert_file: _print(f"Loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") else: _print(f"Loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") - _print(f"Portal INI file for load is: {ini_file}") schema_names = list(_get_schemas(portal).keys()) schema_snake_case_names = [to_snake_case(item) for item in schema_names] @@ -585,11 +567,15 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> loadxl(portal=portal, inserts_directory=tmpdir, schema_names_to_load=schema_names_to_load) else: loadxl(portal=portal, inserts_directory=inserts_directory, schema_names_to_load=schema_names_to_load) + if verbose: if _single_insert_file: _print(f"Done loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") else: _print(f"Done loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") + for item in sorted(loadxl_summary.keys()): + _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") + return True @@ -611,25 +597,67 @@ def _prune_data_for_update(data: dict, noignore: bool = False, ignore: Optional[ return {key: value for key, value in data.items() if key not in ignore_these_properties} -def _create_portal(env: Optional[str] = None, app: Optional[str] = None, - verbose: bool = False, debug: bool = False) -> Optional[Portal]: +def _create_portal(env: Optional[str] = None, ini: Optional[str] = None, app: Optional[str] = None, + load: Optional[str] = None, verbose: bool = False, debug: bool = False, + quiet: bool = False) -> Optional[Portal]: + + if app: + if (app not in ORCHESTRATED_APPS) and ((app := app.lower()) not in ORCHESTRATED_APPS): + _print(f"Unknown app name; must be one of: {' | '.join(ORCHESTRATED_APPS)}") + return None + elif APP_SMAHT in (env or os.environ.get(_SMAHT_ENV_ENVIRON_NAME) or ""): + app = APP_SMAHT + elif APP_CGAP in (env or ""): + app = APP_CGAP + elif APP_FOURFRONT in (env or ""): + app = APP_FOURFRONT + + if ini: + if env: + if not quiet: + _print("Ignoring --env option when --ini option is given.") + elif (app == _SMAHT_ENV_ENVIRON_NAME) and (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)): + if not quiet: + _print(f"Ignoring SMAHT_ENV environment variable ({env}) when --ini option is given.") + if not os.path.isabs(ini_file := os.path.normpath(os.path.expanduser(ini))): + ini_file = os.path.normpath(os.path.join(os.getcwd(), ini_file)) + if not os.path.exists(ini_file): + _print(f"Specified Portal INI file not found: {ini_file}") + return None + with captured_output(not debug): + if not (portal := Portal(ini_file, app=app)): + _print(f"Cannot create INI based Portal object: {env} ({app})") + return None + else: + env_from_environ = False + if not env and app: + # If the --load option is specified, and no --ini option is specified, then do NOT default + # to using the SMAHT_ENV environment variable (if set) for an access-key based Portal + # object; rather default to the default INI file (i.e. development.ini). + if (not load) and (app == APP_SMAHT) and (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)): + env_from_environ = True + if not env: + if os.path.exists(ini_file := os.path.normpath(os.path.join(os.getcwd(), _DEFAULT_INI_FILE_FOR_LOAD))): + return _create_portal(ini=ini_file, app=app, verbose=verbose, debug=debug) + return None + if not (portal := Portal(env, app=app) if env or app else None): + _print(f"Cannot create access-key based Portal object: {env}{f' ({app})' if app else ''}") + return None + + if (ini_file := portal.ini_file): + if not quiet: + _print(f"Portal environment: {ini_file}") + elif (env := portal.env) or (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)): + _print(f"Portal environment" + f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}") + if verbose: + if portal.keys_file: + _print(f"Portal keys file: {portal.keys_file}") + if portal.key_id: + _print(f"Portal key prefix: {portal.key_id[0:2]}******") + if portal.server: + _print(f"Portal server: {portal.server}") - env_from_environ = None - if not env and (app == APP_SMAHT): - if env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME): - env_from_environ = True - if not (portal := Portal(env, app=app) if env or app else None): - return None - if verbose: - if (env := portal.env) or (env := os.environ(_SMAHT_ENV_ENVIRON_NAME)): - _print(f"Portal environment" - f"{f' (from {_SMAHT_ENV_ENVIRON_NAME})' if env_from_environ else ''}: {portal.env}") - if portal.keys_file: - _print(f"Portal keys file: {portal.keys_file}") - if portal.key_id: - _print(f"Portal key prefix: {portal.key_id[0:2]}******") - if portal.server: - _print(f"Portal server: {portal.server}") return portal diff --git a/pyproject.toml b/pyproject.toml index ae1166401..172efa1c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b15" # TODO: To become 8.14.1 +version = "8.14.0.1b16" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 6bea41f89b7a1516f845cf6f5efc16fea5864a1c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 18:53:11 -0400 Subject: [PATCH 19/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 66 +++++++++++++++++++---- poetry.lock | 4 +- pyproject.toml | 2 +- 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 0b7c15b06..55e119700 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -21,7 +21,7 @@ from dcicutils.command_utils import yes_or_no from dcicutils.common import ORCHESTRATED_APPS, APP_CGAP, APP_FOURFRONT, APP_SMAHT from dcicutils.ff_utils import delete_metadata, purge_metadata -from dcicutils.misc_utils import get_error_message, ignored, PRINT, to_camel_case, to_snake_case +from dcicutils.misc_utils import get_error_message, ignored, normalize_string, PRINT, to_camel_case, to_snake_case from dcicutils.portal_utils import Portal as PortalFromUtils from dcicutils.tmpfile_utils import temporary_directory @@ -393,14 +393,20 @@ def _load_data(portal: Portal, load: str, ini_file: str, explicit_schema_name: O verbose: bool = False, debug: bool = False, noprogress: bool = False, _single_insert_file: Optional[str] = None) -> bool: + import snovault.loadxl from snovault.loadxl import load_all_gen, LoadGenWrapper from dcicutils.progress_bar import ProgressBar loadxl_summary = {} + loadxl_unresolved = {} + loadxl_output = [] + loadxl_total_item_count = 0 + loadxl_total_error_count = 0 def loadxl(portal: Portal, inserts_directory: str, schema_names_to_load: dict): nonlocal LoadGenWrapper, load_all_gen, loadxl_summary, verbose, debug + nonlocal loadxl_total_item_count, loadxl_total_error_count progress_total = sum(schema_names_to_load.values()) * 2 # loadxl does two passes progress_bar = ProgressBar(progress_total, interrupt_exit=True) if not noprogress else None @@ -413,27 +419,54 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> return str_or_bytes.strip() return "" + def loadxl_print(arg): + if arg: + loadxl_output.append(normalize_string(str(arg))) + + snovault.loadxl.print = loadxl_print + LOADXL_RESPONSE_PATTERN = re.compile(r"^([A-Z]+):\s*([a-zA-Z\/\d_-]+)\s*(\S+)\s*(\S+)?\s*(.*)$") LOADXL_ACTION_NAME = {"POST": "Create", "PATCH": "Update", "SKIP": "Check", "CHECK": "Validate", "ERROR": "Error"} current_item_type = None current_item_count = 0 current_item_total = 0 - total_item_count = 0 + for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory, docsdir=None, overwrite=True, verbose=True)): - total_item_count += 1 + loadxl_total_item_count += 1 item = decode_bytes(item) match = LOADXL_RESPONSE_PATTERN.match(item) if not match or match.re.groups < 3: continue - action = LOADXL_ACTION_NAME[match.group(1).upper()] - # response_value = match.group(0) - # identifying_value = match.group(2) + if (action := LOADXL_ACTION_NAME[match.group(1).upper()]) == "Error": + identifying_value = match.group(2) + # Example message for unresolved link: + # ERROR: /22813a02-906b-4b60-b2b2-4afaea24aa28 Bad response: 422 Unprocessable Entity + # (not 200 OK or 3xx redirect for http://localhost/file_set?skip_indexing=true)b\'{"@type": + # ["ValidationFailure", "Error"], "status": "error", "code": # 422, "title": "Unprocessable Entity", + # "description": "Failed validation", "errors": [{"location": "body", "name": # "Schema: ", + # "description": "Unable to resolve link: /Library/a4e8f79f-4d47-4e85-9707-c343c940a315"}, + # {"location": "body", "name": "Schema: libraries.0", + # "description": "\\\'a4e8f79f-4d47-4e85-9707-c343c940a315\\\' not found"}]}\' + unresolved_link_error_message_prefix = "Unable to resolve link:" + if (i := item.find(unresolved_link_error_message_prefix)) > 0: + unresolved_link = item[i + len(unresolved_link_error_message_prefix):].strip() + if (i := unresolved_link.find("\"")) > 0: + if (unresolved_link := unresolved_link[0:i]): + if ((error_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and + (len(error_type.groups()) == 1)): # noqa + error_type = to_camel_case(error_type.group(1)) + identifying_value = f"/{error_type}{identifying_value}" + if not loadxl_unresolved.get(identifying_value): + loadxl_unresolved[identifying_value] = [] + loadxl_unresolved[identifying_value].append(unresolved_link) + loadxl_total_error_count += 1 + continue item_type = match.group(3) if current_item_type != item_type: if noprogress and debug and current_item_type is not None: - print() + _print() current_item_type = item_type current_item_count = 0 current_item_total = schema_names_to_load[item_type] @@ -444,12 +477,13 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> loadxl_summary[current_item_type] = 0 loadxl_summary[current_item_type] += 1 if progress_bar: - progress_bar.set_progress(total_item_count) + progress_bar.set_progress(loadxl_total_item_count) elif debug: - print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})") + _print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})") if progress_bar: progress_bar.set_description("▶ Load Complete") - print() + if loadxl_total_item_count > loadxl_total_error_count: + _print() if not portal.vapp: _print("Must using INI based Portal object with --load (use --ini option to specify an INI file).") @@ -573,8 +607,20 @@ def decode_bytes(str_or_bytes: Union[str, bytes], *, encoding: str = "utf-8") -> _print(f"Done loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") else: _print(f"Done loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") + _print(f"Total items loaded: {loadxl_total_item_count}" + f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") + if loadxl_unresolved: + _print("✗ Unresolved references:") + for item in loadxl_unresolved: + _print(f" ▶ {item}: {len(loadxl_unresolved[item])}") + for subitem in loadxl_unresolved[item]: + _print(f" ▷ {subitem}") + if debug and loadxl_output: + _print("✗ Output from loadxl:") + for item in loadxl_output: + _print(f" ▶ {item}") return True diff --git a/poetry.lock b/poetry.lock index 0f853563a..ad7f922a2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1994,7 +1994,7 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2282,4 +2282,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "45bd3f78d7d134a4e8ec30f529e286d963e5612daea5287174a43d1d3069afc2" +content-hash = "bb78e9c396c24c7df9ab1768d13cd979909c31edcdf796bebbb28bb07a5720a6" diff --git a/pyproject.toml b/pyproject.toml index 172efa1c0..f931866ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b16" # TODO: To become 8.14.1 +version = "8.14.0.1b17" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From f8007b432d4a8a20575c25d0e5f273e4fba1a358 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 18:59:25 -0400 Subject: [PATCH 20/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 55e119700..8610ef400 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -433,7 +433,7 @@ def loadxl_print(arg): current_item_total = 0 for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory, - docsdir=None, overwrite=True, verbose=True)): + docsdir=None, overwrite=True, verbose=True, continue_on_exception=True)): loadxl_total_item_count += 1 item = decode_bytes(item) match = LOADXL_RESPONSE_PATTERN.match(item) diff --git a/pyproject.toml b/pyproject.toml index f931866ca..647bceeb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b17" # TODO: To become 8.14.1 +version = "8.14.0.1b18" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 0b767a4fac992fb3050f2a9bd05ee98253b7b204 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 18:59:45 -0400 Subject: [PATCH 21/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 8610ef400..f77878868 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -433,7 +433,8 @@ def loadxl_print(arg): current_item_total = 0 for item in LoadGenWrapper(load_all_gen(testapp=portal.vapp, inserts=inserts_directory, - docsdir=None, overwrite=True, verbose=True, continue_on_exception=True)): + docsdir=None, overwrite=True, verbose=True, + continue_on_exception=True)): loadxl_total_item_count += 1 item = decode_bytes(item) match = LOADXL_RESPONSE_PATTERN.match(item) From 6df4e43389f2398844b40004c4d28aab7679827d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Sun, 18 Aug 2024 23:35:35 -0400 Subject: [PATCH 22/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 44 +++++++++++++++-------- pyproject.toml | 2 +- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index f77878868..3db1ddd6e 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -441,8 +441,11 @@ def loadxl_print(arg): if not match or match.re.groups < 3: continue if (action := LOADXL_ACTION_NAME[match.group(1).upper()]) == "Error": + loadxl_total_error_count += 1 identifying_value = match.group(2) - # Example message for unresolved link: + # + # Example message for unresolved link ... + # # ERROR: /22813a02-906b-4b60-b2b2-4afaea24aa28 Bad response: 422 Unprocessable Entity # (not 200 OK or 3xx redirect for http://localhost/file_set?skip_indexing=true)b\'{"@type": # ["ValidationFailure", "Error"], "status": "error", "code": # 422, "title": "Unprocessable Entity", @@ -450,21 +453,33 @@ def loadxl_print(arg): # "description": "Unable to resolve link: /Library/a4e8f79f-4d47-4e85-9707-c343c940a315"}, # {"location": "body", "name": "Schema: libraries.0", # "description": "\\\'a4e8f79f-4d47-4e85-9707-c343c940a315\\\' not found"}]}\' + # + # OR ... + # + # ERROR: /22813a02-906b-4b60-b2b2-4afaea24aa28 Bad response: 404 Not Found (not 200 OK or 3xx + # redirect for http://localhost/22813a02-906b-4b60-b2b2-4afaea24aa28)b\'{"@type": ["HTTPNotFound", + # "Error"], "status": "error", "code": 404, "title": "Not Found", "description": "The resource + # could not be found.", "detail": "debug_notfound of url http://localhost/22813a02-906b-4b60-b2b2-4afaea24aa28; # noqa + # path_info: \\\'/22813a02-906b-4b60-b2b2-4afaea24aa28\\\', context: , # noqa + # view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root: + # , vroot: , vroot_path: ()"}\' # noqa + # + if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1): # noqa + item_type = to_snake_case(item_type.group(1)) + identifying_value = f"/{to_camel_case(item_type)}{identifying_value}" unresolved_link_error_message_prefix = "Unable to resolve link:" if (i := item.find(unresolved_link_error_message_prefix)) > 0: unresolved_link = item[i + len(unresolved_link_error_message_prefix):].strip() if (i := unresolved_link.find("\"")) > 0: if (unresolved_link := unresolved_link[0:i]): - if ((error_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and - (len(error_type.groups()) == 1)): # noqa - error_type = to_camel_case(error_type.group(1)) - identifying_value = f"/{error_type}{identifying_value}" - if not loadxl_unresolved.get(identifying_value): - loadxl_unresolved[identifying_value] = [] - loadxl_unresolved[identifying_value].append(unresolved_link) - loadxl_total_error_count += 1 - continue - item_type = match.group(3) + if not loadxl_unresolved.get(unresolved_link): + loadxl_unresolved[unresolved_link] = [] + if identifying_value not in loadxl_unresolved[unresolved_link]: + loadxl_unresolved[unresolved_link].append(identifying_value) + if not item_type: + continue + else: + item_type = match.group(3) if current_item_type != item_type: if noprogress and debug and current_item_type is not None: _print() @@ -483,6 +498,7 @@ def loadxl_print(arg): _print(f"{current_item_type}: {current_item_count} or {current_item_total} ({action})") if progress_bar: progress_bar.set_description("▶ Load Complete") + progress_bar.set_progress(progress_total) if loadxl_total_item_count > loadxl_total_error_count: _print() @@ -608,16 +624,16 @@ def loadxl_print(arg): _print(f"Done loading data into Portal (via snovault.loadxl) from file: {_single_insert_file}") else: _print(f"Done loading data into Portal (via snovault.loadxl) from directory: {inserts_directory}") - _print(f"Total items loaded: {loadxl_total_item_count}" + _print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") if loadxl_unresolved: _print("✗ Unresolved references:") for item in loadxl_unresolved: - _print(f" ▶ {item}: {len(loadxl_unresolved[item])}") + _print(f" ✗ {item}: {len(loadxl_unresolved[item])}") for subitem in loadxl_unresolved[item]: - _print(f" ▷ {subitem}") + _print(f" ▶ {subitem}") if debug and loadxl_output: _print("✗ Output from loadxl:") for item in loadxl_output: diff --git a/pyproject.toml b/pyproject.toml index 647bceeb9..01ebd0570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b18" # TODO: To become 8.14.1 +version = "8.14.0.1b19" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 0849b627cbb45da4b7118c612fea05a79eef7fdb Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 08:34:41 -0400 Subject: [PATCH 23/36] comments --- dcicutils/scripts/update_portal_object.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 3db1ddd6e..98d71f2ca 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -2,9 +2,16 @@ # Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. # ------------------------------------------------------------------------------------------------------ # Example commands: -# update-portal-object --post file_format.json -# update-portal-object --upsert directory-with-schema-named-dot-json-files -# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads +# +# update-portal-object --load {json-file | directory-with-json-files} +# update-portal-object --post {json-file | directory-with-json-files} +# update-portal-object --upsert {json-file | directory-with-json-files} +# update-portal-object --patch {json-file | directory-with-json-files} +# +# The specified json-file or file withing directory-with-jaon-files must be JSON containing either +# a list of objects, which which case the file name for the target schema name, or if not, then +# the --schema option must be used to specified the target schema; or the JSON must be a dictionary +# of schema names, where the value of each is a list of objects for that schema. # -------------------------------------------------------------------------------------------------- import argparse From d144be45f3581445117202142eecb834f2eb470f Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 08:45:21 -0400 Subject: [PATCH 24/36] comments --- dcicutils/scripts/update_portal_object.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 98d71f2ca..8c437fc78 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -634,7 +634,7 @@ def loadxl_print(arg): _print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): - _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") + _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}") # TODO: straightend out this arithmetic if loadxl_unresolved: _print("✗ Unresolved references:") for item in loadxl_unresolved: diff --git a/pyproject.toml b/pyproject.toml index 01ebd0570..e9a0c51a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b19" # TODO: To become 8.14.1 +version = "8.14.0.1b20" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 4dced61e63ee53f3cf2e0d61a4f5e42f1ae4130c Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:06:02 -0400 Subject: [PATCH 25/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 8c437fc78..fb8557569 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -2,16 +2,9 @@ # Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. # ------------------------------------------------------------------------------------------------------ # Example commands: -# -# update-portal-object --load {json-file | directory-with-json-files} -# update-portal-object --post {json-file | directory-with-json-files} -# update-portal-object --upsert {json-file | directory-with-json-files} -# update-portal-object --patch {json-file | directory-with-json-files} -# -# The specified json-file or file withing directory-with-jaon-files must be JSON containing either -# a list of objects, which which case the file name for the target schema name, or if not, then -# the --schema option must be used to specified the target schema; or the JSON must be a dictionary -# of schema names, where the value of each is a list of objects for that schema. +# update-portal-object --post file_format.json +# update-portal-object --upsert directory-with-schema-named-dot-json-files +# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads # -------------------------------------------------------------------------------------------------- import argparse @@ -471,7 +464,8 @@ def loadxl_print(arg): # view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root: # , vroot: , vroot_path: ()"}\' # noqa # - if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1): # noqa + if ((item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and + (len(item_type.groups()) == 1)): # noqa item_type = to_snake_case(item_type.group(1)) identifying_value = f"/{to_camel_case(item_type)}{identifying_value}" unresolved_link_error_message_prefix = "Unable to resolve link:" @@ -634,7 +628,7 @@ def loadxl_print(arg): _print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): - _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}") # TODO: straightend out this arithmetic + _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") if loadxl_unresolved: _print("✗ Unresolved references:") for item in loadxl_unresolved: From a895507bf68ef525df75a10d943e89b167b2edb1 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:06:12 -0400 Subject: [PATCH 26/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e9a0c51a7..9c900b43a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b20" # TODO: To become 8.14.1 +version = "8.14.0.1b21" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 720e68ff3fe8224838d82617ba60ebdafe3aedaa Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:14:00 -0400 Subject: [PATCH 27/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 18 +++++++---- dcicutils/scripts/view_portal_object.py | 39 +++++++++++++++++++---- pyproject.toml | 2 +- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index fb8557569..8c437fc78 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -2,9 +2,16 @@ # Command-line utility to update (post, patch, upsert) portal objects for SMaHT/CGAP/Fourfront. # ------------------------------------------------------------------------------------------------------ # Example commands: -# update-portal-object --post file_format.json -# update-portal-object --upsert directory-with-schema-named-dot-json-files -# update-portal-object --patch file-not-named-for-schema-name.json --schema UnalignedReads +# +# update-portal-object --load {json-file | directory-with-json-files} +# update-portal-object --post {json-file | directory-with-json-files} +# update-portal-object --upsert {json-file | directory-with-json-files} +# update-portal-object --patch {json-file | directory-with-json-files} +# +# The specified json-file or file withing directory-with-jaon-files must be JSON containing either +# a list of objects, which which case the file name for the target schema name, or if not, then +# the --schema option must be used to specified the target schema; or the JSON must be a dictionary +# of schema names, where the value of each is a list of objects for that schema. # -------------------------------------------------------------------------------------------------- import argparse @@ -464,8 +471,7 @@ def loadxl_print(arg): # view_name: \\\'22813a02-906b-4b60-b2b2-4afaea24aa28\\\', subpath: (), traversed: (), root: # , vroot: , vroot_path: ()"}\' # noqa # - if ((item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and - (len(item_type.groups()) == 1)): # noqa + if (item_type := re.search(r"https?://.*/(.*)\?skip_indexing=.*", item)) and (len(item_type.groups()) == 1): # noqa item_type = to_snake_case(item_type.group(1)) identifying_value = f"/{to_camel_case(item_type)}{identifying_value}" unresolved_link_error_message_prefix = "Unable to resolve link:" @@ -628,7 +634,7 @@ def loadxl_print(arg): _print(f"Total items loaded: {loadxl_total_item_count // 2}" # TODO: straightend out this arithmetic f"{f' (errors: {loadxl_total_error_count})' if loadxl_total_error_count else ''}") for item in sorted(loadxl_summary.keys()): - _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item]}") + _print(f"▷ {to_camel_case(item)}: {loadxl_summary[item] // 2}") # TODO: straightend out this arithmetic if loadxl_unresolved: _print("✗ Unresolved references:") for item in loadxl_unresolved: diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 1128d1539..4abe0ca01 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -290,6 +290,23 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq _print() return None + def get_metadata_types(path: str) -> Optional[dict]: + nonlocal portal, debug + metadata_types = {} + try: + if debug: + _print(f"Executing separted query to get actual metadata types for raw/inserts query.") + if ((response := portal.get(path)) and (response.status_code in [200, 307]) and + (response := response.json()) and (results := response.get("@graph"))): # noqa + for result in results: + if (result_type := result.get("@type")) and (result_uuid := result.get("uuid")): + if ((isinstance(result_type, list) and (result_type := result_type[0])) or + isinstance(result_type, str)): # noqa + metadata_types[result_uuid] = result_type + except Exception: + return None + return metadata_types + response = None try: if not uuid.startswith("/"): @@ -312,6 +329,7 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq if not response.json: _exit(f"Invalid JSON getting Portal object: {uuid}") response = response.json() + response_types = {} if inserts: # Format results as suitable for inserts (e.g. via update-portal-object). response.pop("schema_version", None) @@ -319,10 +337,12 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq (isinstance(results_type := response.get("@type"), list) and results_type) and (isinstance(results_type := results_type[0], str) and results_type.endswith("SearchResults")) and (results_type := results_type[0:-len("SearchResults")])): # noqa - # For search results, the type (from XyzSearchResults, above) may not be precisely correct for - # each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun); + # For (raw frame) search results, the type (from XyzSearchResults, above) may not be precisely correct + # for each of the results; it may be the supertype (e.g. QualityMetric vs QualityMetricWorkflowRun); # so for types which are supertypes (gotten via Portal.get_schemas_super_type_map) we actually - # lookup each result individually to determine its actual precise type. + # lookup each result individually to determine its actual precise type. Although, if we have + # more than (say) 5 results to do this for, then do a separate query (get_metadata_types) + # to get the result types all at once. if not ((supertypes := portal.get_schemas_super_type_map()) and (subtypes := supertypes.get(results_type))): subtypes = None response = {} @@ -335,9 +355,16 @@ def get_metadata_for_individual_result_type(uuid: str) -> Optional[dict]: # noq result.pop("schema_version", None) result = prune_data(result) if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and - (result_uuid := result.get("uuid")) and - (individual_result_type := get_metadata_for_individual_result_type(result_uuid))): # noqa - result_type = individual_result_type + (result_uuid := result.get("uuid"))): # noqa + # If we have more than (say) 5 results for which we need to determine that actual result type, + # then get them all at once via separate query (get_metadata_types)) which is not the raw frame. + if (results_total > 5) and (not response_types): + response_types = get_metadata_types(path) + if not (response_types and (result_type := response_types.get(result_uuid))): + if individual_result_type := get_metadata_for_individual_result_type(result_uuid): + result_type = individual_result_type + else: + result_type = results_type else: result_type = results_type if response.get(result_type): diff --git a/pyproject.toml b/pyproject.toml index 9c900b43a..3bade115e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b21" # TODO: To become 8.14.1 +version = "8.14.0.1b22" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 662e2938a33a514b9772dcfe713e9e663259e607 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:20:05 -0400 Subject: [PATCH 28/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/view_portal_object.py | 5 +++-- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 4abe0ca01..a5a7b4d61 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -294,8 +294,8 @@ def get_metadata_types(path: str) -> Optional[dict]: nonlocal portal, debug metadata_types = {} try: - if debug: - _print(f"Executing separted query to get actual metadata types for raw/inserts query.") + if verbose: + _print(f"Executing separted query to get actual metadata types for raw/inserts query.") if ((response := portal.get(path)) and (response.status_code in [200, 307]) and (response := response.json()) and (results := response.get("@graph"))): # noqa for result in results: @@ -356,6 +356,7 @@ def get_metadata_types(path: str) -> Optional[dict]: result = prune_data(result) if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and (result_uuid := result.get("uuid"))): # noqa + import pdb ; pdb.set_trace() # noqa # If we have more than (say) 5 results for which we need to determine that actual result type, # then get them all at once via separate query (get_metadata_types)) which is not the raw frame. if (results_total > 5) and (not response_types): diff --git a/pyproject.toml b/pyproject.toml index 3bade115e..f3d633f79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b22" # TODO: To become 8.14.1 +version = "8.14.0.1b23" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From db77ebd24e160c57855aadc72f37cdf1d1550a3b Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:21:46 -0400 Subject: [PATCH 29/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/view_portal_object.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index a5a7b4d61..6538c6398 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -356,7 +356,6 @@ def get_metadata_types(path: str) -> Optional[dict]: result = prune_data(result) if (subtypes and one_or_more_objects_of_types_exists(portal, subtypes, debug=debug) and (result_uuid := result.get("uuid"))): # noqa - import pdb ; pdb.set_trace() # noqa # If we have more than (say) 5 results for which we need to determine that actual result type, # then get them all at once via separate query (get_metadata_types)) which is not the raw frame. if (results_total > 5) and (not response_types): From 21e9d70153913ea91eb3c80198def0be6c1f8ff6 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:21:58 -0400 Subject: [PATCH 30/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f3d633f79..3b9b54ae8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b23" # TODO: To become 8.14.1 +version = "8.14.0.1b24" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a57454b1fa16b9da9124332d60930394657a0334 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 11:44:35 -0400 Subject: [PATCH 31/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 8c437fc78..e90be01e7 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -534,7 +534,7 @@ def loadxl_print(arg): if isinstance(data, list): if not (schema_name := explicit_schema_name): if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)): - _print("Unable to determine schema name for JSON data file: {inserts_file}") + _print(f"Unable to determine schema name for JSON data file: {inserts_file}") return False elif not (schema_name := _get_schema(portal, explicit_schema_name)[1]): _print(f"Unknown specified schema name: {explicit_schema_name}") diff --git a/pyproject.toml b/pyproject.toml index 3b9b54ae8..24175f1ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b24" # TODO: To become 8.14.1 +version = "8.14.0.1b25" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 52b5ec273bba7b26721f066e9ef0ca391376b7f4 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 13:24:55 -0400 Subject: [PATCH 32/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 7 ++++--- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index e90be01e7..4863fc840 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -707,9 +707,10 @@ def _create_portal(env: Optional[str] = None, ini: Optional[str] = None, app: Op if (not load) and (app == APP_SMAHT) and (env := os.environ.get(_SMAHT_ENV_ENVIRON_NAME)): env_from_environ = True if not env: - if os.path.exists(ini_file := os.path.normpath(os.path.join(os.getcwd(), _DEFAULT_INI_FILE_FOR_LOAD))): - return _create_portal(ini=ini_file, app=app, verbose=verbose, debug=debug) - return None + if not os.path.exists(ini_file := os.path.normpath(os.path.join(os.getcwd(), _DEFAULT_INI_FILE_FOR_LOAD))): + _print("Must specify --ini or --env option in order to create a Portal object.") + return None + return _create_portal(ini=ini_file, app=app, verbose=verbose, debug=debug) if not (portal := Portal(env, app=app) if env or app else None): _print(f"Cannot create access-key based Portal object: {env}{f' ({app})' if app else ''}") return None diff --git a/pyproject.toml b/pyproject.toml index 24175f1ad..a0ec694a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b25" # TODO: To become 8.14.1 +version = "8.14.0.1b26" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 8ed90d3c1387a63bf0180859e5d50b98286054e5 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 14:23:40 -0400 Subject: [PATCH 33/36] merge from main --- dcicutils/scripts/update_portal_object.py | 20 ++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index 4863fc840..d8c3f8fce 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -139,6 +139,8 @@ def main(): parser.add_argument("--noignore", action="store_true", required=False, default=False, help="Do not ignore standard fields on update(s).") parser.add_argument("--ignore", nargs="+", help="Ignore these additional fields.") + parser.add_argument("--unresolved-output", "--unresolved", type=str, + help="Output file to write unresolved references to for --load only.") parser.add_argument("--confirm", action="store_true", required=False, default=False, help="Confirm before action.") parser.add_argument("--verbose", action="store_true", required=False, default=False, help="Verbose output.") parser.add_argument("--quiet", action="store_true", required=False, default=False, help="Quiet output.") @@ -162,6 +164,7 @@ def usage(message: Optional[str] = None) -> None: if args.load: _load_data(portal=portal, load=args.load, ini_file=args.ini, explicit_schema_name=args.schema, + unresolved_output=args.unresolved_output, verbose=args.verbose, debug=args.debug, noprogress=args.noprogress) if explicit_schema_name := args.schema: @@ -397,6 +400,7 @@ def _upsert_data(portal: Portal, data: dict, schema_name: str, def _load_data(portal: Portal, load: str, ini_file: str, explicit_schema_name: Optional[str] = None, + unresolved_output: Optional[str] = False, verbose: bool = False, debug: bool = False, noprogress: bool = False, _single_insert_file: Optional[str] = None) -> bool: @@ -515,6 +519,7 @@ def loadxl_print(arg): if not os.path.isabs(load := os.path.normpath(os.path.expanduser(load))): load = os.path.normpath(os.path.join(os.getcwd(), load)) if not os.path.exists(load): + _print(f"Specified JSON data file not found: {load}") return False if os.path.isdir(load): @@ -544,6 +549,7 @@ def loadxl_print(arg): with io.open(file_name, "w") as f: json.dump(data, f) return _load_data(portal=portal, load=tmpdir, ini_file=ini_file, explicit_schema_name=schema_name, + unresolved_output=unresolved_output, verbose=verbose, debug=debug, noprogress=noprogress, _single_insert_file=inserts_file) elif isinstance(data, dict): @@ -571,6 +577,7 @@ def loadxl_print(arg): nfiles += 1 if nfiles > 0: return _load_data(portal=portal, load=tmpdir, ini_file=ini_file, + unresolved_output=unresolved_output, verbose=verbose, debug=debug, noprogress=noprogress, _single_insert_file=inserts_file) return True @@ -641,6 +648,19 @@ def loadxl_print(arg): _print(f" ✗ {item}: {len(loadxl_unresolved[item])}") for subitem in loadxl_unresolved[item]: _print(f" ▶ {subitem}") + if unresolved_output: + if unresolved_output: + if not os.path.isabs(unresolved_output := os.path.normpath(os.path.expanduser(unresolved_output))): + unresolved_output = os.path.normpath(os.path.join(os.getcwd(), unresolved_output)) + if os.path.exists(unresolved_output): + if os.path.isdir(unresolved_output): + _print("Unresolved output file exists as a directory: {unresolved_output}") + return False + _print(f"Unresolved output file already exists: {unresolved_output}") + if yes_or_no(f"Do you want to overwrite this file?"): + with io.open(unresolved_output, "w") as f: + for item in loadxl_unresolved: + f.write(f"{item}\n") if debug and loadxl_output: _print("✗ Output from loadxl:") for item in loadxl_output: diff --git a/pyproject.toml b/pyproject.toml index a0ec694a5..4ad528335 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b26" # TODO: To become 8.14.1 +version = "8.14.0.1b27" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From a18842aad4738228515719567a5e634eb9127ca3 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 14:39:32 -0400 Subject: [PATCH 34/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/update_portal_object.py | 7 +++++-- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dcicutils/scripts/update_portal_object.py b/dcicutils/scripts/update_portal_object.py index d8c3f8fce..27ffcef8b 100644 --- a/dcicutils/scripts/update_portal_object.py +++ b/dcicutils/scripts/update_portal_object.py @@ -562,8 +562,11 @@ def loadxl_print(arg): else: data = {schema_name: [data]} if not _is_schema_name_list(portal, schema_names := list(data.keys())): - _print(f"Unrecognized types in JSON data file: {inserts_file}") - return False + if not (schema_name := _get_schema_name_from_schema_named_json_file_name(portal, inserts_file)): + _print(f"Unrecognized types in JSON data file: {inserts_file}") + # Assume simple object of type from the JSON file name. + schema_names = [schema_name] + data = {schema_name: [data]} with temporary_directory() as tmpdir: nfiles = 0 for schema_name in schema_names: diff --git a/pyproject.toml b/pyproject.toml index 4ad528335..81c9f20d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b27" # TODO: To become 8.14.1 +version = "8.14.0.1b28" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 925e156025c6262451c287bf9c33ba8f181d305d Mon Sep 17 00:00:00 2001 From: David Michaels Date: Mon, 19 Aug 2024 17:57:32 -0400 Subject: [PATCH 35/36] Minor changes to utility/troubleshooting/convenience script view-portal-object. --- dcicutils/scripts/view_portal_object.py | 133 ++++++++++++++++-------- pyproject.toml | 2 +- 2 files changed, 92 insertions(+), 43 deletions(-) diff --git a/dcicutils/scripts/view_portal_object.py b/dcicutils/scripts/view_portal_object.py index 6538c6398..8696c94b6 100644 --- a/dcicutils/scripts/view_portal_object.py +++ b/dcicutils/scripts/view_portal_object.py @@ -307,28 +307,76 @@ def get_metadata_types(path: str) -> Optional[dict]: return None return metadata_types - response = None - try: - if not uuid.startswith("/"): - path = f"/{uuid}" - else: - path = uuid - response = portal.get(path, raw=raw or inserts, database=database) - except Exception as e: - if "404" in str(e) and "not found" in str(e).lower(): - _print(f"Portal object not found at {portal.server}: {uuid}") - _exit() - _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}") - if not response: - if check: - return None - _exit(f"Null response getting Portal object from {portal.server}: {uuid}") - if response.status_code not in [200, 307]: - # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above. - _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}") - if not response.json: - _exit(f"Invalid JSON getting Portal object: {uuid}") - response = response.json() + def write_insert_files(response: dict) -> None: + nonlocal insert_files, force + output_directory = insert_files if isinstance(insert_files, str) else os.getcwd() + for schema_name in response: + schema_data = response[schema_name] + file_name = f"{to_snake_case(schema_name)}.json" + file_path = os.path.join(output_directory, file_name) + message_verb = "Writing" + if os.path.exists(file_path): + message_verb = "Overwriting" + if os.path.isdir(file_path): + _print(f"WARNING: Output file already exists as a directory. SKIPPING: {file_path}") + continue + if not force: + _print(f"Output file already exists: {file_path}") + if not yes_or_no(f"Overwrite this file?"): + continue + if verbose: + _print(f"{message_verb} {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" + f" {len(schema_data)}) file: {file_path}") + with io.open(file_path, "w") as f: + json.dump(schema_data, f, indent=4) + + if os.path.exists(uuid) and inserts: + # Very special case: If given "uuid" (or other path) as actually a file then assume it + # contains a list of references (e.g. /Donor/3039a6ca-9849-432d-ad49-2c5630bcbee7) to fetch. + response = {} + if verbose: + _print(f"Reading references from file: {uuid}") + with io.open(uuid) as f: + for line in f: + if ((line := line.strip()) and (components := line.split("/")) and (len(components) > 1) and + (schema_name := components[1]) and (schema_name := _get_schema(portal, schema_name)[1])): # noqa + try: + if ((result := portal.get(line, raw=True, database=database)) and + (result.status_code in [200, 307]) and (result := result.json())): # noqa + if not response.get(schema_name): + response[schema_name] = [] + response[schema_name].append(result) + continue + except Exception: + pass + _print(f"Cannot get reference: {line}") + if insert_files: + write_insert_files(response) + return response + else: + response = None + try: + if not uuid.startswith("/"): + path = f"/{uuid}" + else: + path = uuid + response = portal.get(path, raw=raw or inserts, database=database) + except Exception as e: + if "404" in str(e) and "not found" in str(e).lower(): + _print(f"Portal object not found at {portal.server}: {uuid}") + _exit() + _exit(f"Exception getting Portal object from {portal.server}: {uuid}\n{get_error_message(e)}") + if not response: + if check: + return None + _exit(f"Null response getting Portal object from {portal.server}: {uuid}") + if response.status_code not in [200, 307]: + # TODO: Understand why the /me endpoint returns HTTP status code 307, which is only why we mention it above. + _exit(f"Invalid status code ({response.status_code}) getting Portal object from {portal.server}: {uuid}") + if not response.json: + _exit(f"Invalid JSON getting Portal object: {uuid}") + response = response.json() + response_types = {} if inserts: # Format results as suitable for inserts (e.g. via update-portal-object). @@ -376,26 +424,27 @@ def get_metadata_types(path: str) -> Optional[dict]: (isinstance(response_type := response_cooked.json().get("@type"), list) and response_type)): response = {f"{response_type[0]}": [prune_data(response)]} if insert_files: - output_directory = insert_files if isinstance(insert_files, str) else os.getcwd() - for schema_name in response: - schema_data = response[schema_name] - file_name = f"{to_snake_case(schema_name)}.json" - file_path = os.path.join(output_directory, file_name) - message_verb = "Writing" - if os.path.exists(file_path): - message_verb = "Overwriting" - if os.path.isdir(file_path): - _print(f"WARNING: Output file already exists as a directory. SKIPPING: {file_path}") - continue - if not force: - _print(f"Output file already exists: {file_path}") - if not yes_or_no(f"Overwrite this file?"): - continue - if verbose: - _print(f"{message_verb} {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" - f" {len(schema_data)}) file: {file_path}") - with io.open(file_path, "w") as f: - json.dump(schema_data, f, indent=4) + write_insert_files(response) +# output_directory = insert_files if isinstance(insert_files, str) else os.getcwd() +# for schema_name in response: +# schema_data = response[schema_name] +# file_name = f"{to_snake_case(schema_name)}.json" +# file_path = os.path.join(output_directory, file_name) +# message_verb = "Writing" +# if os.path.exists(file_path): +# message_verb = "Overwriting" +# if os.path.isdir(file_path): +# _print(f"WARNING: Output file already exists as a directory. SKIPPING: {file_path}") +# continue +# if not force: +# _print(f"Output file already exists: {file_path}") +# if not yes_or_no(f"Overwrite this file?"): +# continue +# if verbose: +# _print(f"{message_verb} {schema_name} (object{'s' if len(schema_data) != 1 else ''}:" +# f" {len(schema_data)}) file: {file_path}") +# with io.open(file_path, "w") as f: +# json.dump(schema_data, f, indent=4) elif raw: response.pop("schema_version", None) return response diff --git a/pyproject.toml b/pyproject.toml index 81c9f20d5..1ac2cc045 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b28" # TODO: To become 8.14.1 +version = "8.14.0.1b29" # TODO: To become 8.14.1 description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT" From 754e1d17c64aa6f6978b210ab1add9919a1a8c77 Mon Sep 17 00:00:00 2001 From: David Michaels Date: Tue, 20 Aug 2024 13:33:42 -0400 Subject: [PATCH 36/36] update version 8.14.1 - ready to merge pr-314 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1ac2cc045..bfe446d56 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicutils" -version = "8.14.0.1b29" # TODO: To become 8.14.1 +version = "8.14.1" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" authors = ["4DN-DCIC Team "] license = "MIT"