From dd16a9719c10a116b4d30c6e5b749121bd3d08f9 Mon Sep 17 00:00:00 2001 From: aschroed Date: Tue, 22 Aug 2023 16:51:10 -0400 Subject: [PATCH 1/3] update to use dcicutils schema functions --- CHANGELOG.rst | 9 +++++++ functions/notebook_functions.py | 27 +++---------------- .../03_find_and_transfer_env.ipynb | 2 +- .../04_microscopy_template_submit4dn.ipynb | 2 +- .../04_omics_template_submit4dn.ipynb | 2 +- ...items_add_to_excel_or_create_inserts.ipynb | 4 +-- pyproject.toml | 4 +-- 7 files changed, 19 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c5b5fcd..47d5af5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,15 @@ dcicwrangling Change Log ---------- + +2.3.0 +===== + +* update to use get_schemas function from dcicutils rather than having a 'broken' redundant copy +* same for dump_json_data function +* updated notebooks to call dcicutils versions of functions +* simplify get_schemas_names_and_fields to use dcicutil function + 2.2.1 ===== diff --git a/functions/notebook_functions.py b/functions/notebook_functions.py index e7f9a46..648c85b 100644 --- a/functions/notebook_functions.py +++ b/functions/notebook_functions.py @@ -1,6 +1,7 @@ from dcicutils import ff_utils from uuid import UUID import os +import sys import json import openpyxl import warnings # to suppress openpxl warning about headers @@ -310,26 +311,13 @@ def find_uuids(val): return vals -def get_schema_names(con_key): - schema_name = {} - profiles = ff_utils.get_metadata('/profiles/', key=con_key, add_on='frame=raw') - for key, value in profiles.items(): - try: - schema_name[key] = value['id'].split('/')[-1][:-5] - except: - continue - return schema_name - - def get_schema_names_and_fields(con_key): '''Gets concrete item types from profiles and returns a dict of schema names, with properties and property type (including if array_linkTo)''' schemas = {} - profiles = ff_utils.get_metadata('/profiles/', key=con_key, add_on='frame=raw') + profiles = ff_utils.get_schemas(key=con_key, allow_abstract=False, require_id=True) for item in profiles.values(): - if item['isAbstract'] is True: - continue - schema_name = item['id'].split('/')[-1][:-5] + schema_name = item['$id'].split('/')[-1][:-5] schemas[schema_name] = {} for field, content in item['properties'].items(): field_type = content['type'] @@ -339,15 +327,6 @@ def get_schema_names_and_fields(con_key): return schemas -def dump_results_to_json(store, folder): - if not os.path.exists(folder): - os.makedirs(folder) - for a_type in store: - filename = folder + '/' + a_type + '.json' - with open(filename, 'w') as outfile: - json.dump(store[a_type], outfile, indent=4) - - def printTable(myDict, colList=None): """ Pretty print a list of dictionaries Author: Thierry Husson""" if not colList: diff --git a/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb b/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb index 6b25192..e217648 100644 --- a/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb +++ b/notebooks/useful_notebooks/03_find_and_transfer_env.ipynb @@ -26,7 +26,7 @@ "# get key from keypairs.json\n", "my_env = 'data'\n", "my_key = get_key('koray_data')\n", - "schema_name = get_schema_names(my_key) \n", + "schema_name = ff_utils.get_schema_names(my_key) \n", "print('WORKING ON', my_key['server'], '\\n')\n", "\n", "##### COLLECT ITEMS TO Release #####\n", diff --git a/notebooks/useful_notebooks/04_microscopy_template_submit4dn.ipynb b/notebooks/useful_notebooks/04_microscopy_template_submit4dn.ipynb index ae6ce17..f0a9294 100644 --- a/notebooks/useful_notebooks/04_microscopy_template_submit4dn.ipynb +++ b/notebooks/useful_notebooks/04_microscopy_template_submit4dn.ipynb @@ -30,7 +30,7 @@ "\n", "# get key from keypairs.json\n", "my_key = get_key('koray_data')\n", - "schema_name = get_schema_names(my_key) \n", + "schema_name = ff_utils.get_schema_names(my_key) \n", "\n", "# project folder\n", "my_folder = '/Users/koray/Desktop/wrangling/Templating/'\n", diff --git a/notebooks/useful_notebooks/04_omics_template_submit4dn.ipynb b/notebooks/useful_notebooks/04_omics_template_submit4dn.ipynb index fbec411..c8cf2ca 100644 --- a/notebooks/useful_notebooks/04_omics_template_submit4dn.ipynb +++ b/notebooks/useful_notebooks/04_omics_template_submit4dn.ipynb @@ -31,7 +31,7 @@ "\n", "# get key from keypairs.json\n", "my_key = get_key('')\n", - "schema_name = get_schema_names(my_key) \n", + "schema_name = ff_utils.get_schema_names(my_key) \n", "\n", "# template excel from submit4dn\n", "excel_file = '/Users/user/Desktop/Templating/MetadataSheets.xls'\n", diff --git a/notebooks/useful_notebooks/07_find_items_add_to_excel_or_create_inserts.ipynb b/notebooks/useful_notebooks/07_find_items_add_to_excel_or_create_inserts.ipynb index 05be9f8..304deda 100644 --- a/notebooks/useful_notebooks/07_find_items_add_to_excel_or_create_inserts.ipynb +++ b/notebooks/useful_notebooks/07_find_items_add_to_excel_or_create_inserts.ipynb @@ -46,11 +46,11 @@ "print(round((time2-time1), 1), 'sec for collection')\n", "\n", "if purpose == 'excel':\n", - " schema_name = get_schema_names(my_auth) \n", + " schema_name = ff_utils.get_schema_names(my_auth) \n", " append_items_to_xlsx(excel_file, store, schema_name)\n", "\n", "elif purpose == 'inserts':\n", - " dump_results_to_json(store, my_folder)" + " ff_utils.dump_results_to_json(store, my_folder)" ] } ], diff --git a/pyproject.toml b/pyproject.toml index fc5216c..8871d79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicwrangling" -version = "2.2.1" +version = "2.3.0" description = "Scripts and Jupyter notebooks for 4DN wrangling" authors = ["4DN-DCIC Team "] license = "MIT" @@ -11,7 +11,7 @@ packages = [{ include="scripts", from="." }] [tool.poetry.dependencies] python = ">=3.7.1, <3.9" -dcicutils = "^7.8.0" +dcicutils = "^7.9.0" openpyxl = "^3.0.9" Biopython = "1.76" GEOparse = "^2.0.1" From 77277aff066f0b1b4939f505a965431db72875f7 Mon Sep 17 00:00:00 2001 From: aschroed Date: Tue, 22 Aug 2023 16:56:23 -0400 Subject: [PATCH 2/3] relock versions --- poetry.lock | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 898cdf9..6d645e3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -252,18 +252,18 @@ css = ["tinycss2 (>=1.1.0,<1.2)"] [[package]] name = "boto3" -version = "1.28.28" +version = "1.28.32" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.28.28-py3-none-any.whl", hash = "sha256:57d618f03bd269ebef6287dd4ed86ddaa1d53a4021008ad3267c6097be17e172"}, - {file = "boto3-1.28.28.tar.gz", hash = "sha256:4a435fdbd77628e3d32cfbc8b6225e779d8f789027fadb6a51fe1b456e15ef54"}, + {file = "boto3-1.28.32-py3-none-any.whl", hash = "sha256:ed787f250ce2562c7744395bdf32b5a7bc9184126ef50a75e97bcb66043dccf3"}, + {file = "boto3-1.28.32.tar.gz", hash = "sha256:b505faa126db84e226f6f8d242a798fae30a725f0cac8a76c6aca9ace4e8eb28"}, ] [package.dependencies] -botocore = ">=1.31.28,<1.32.0" +botocore = ">=1.31.32,<1.32.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -272,14 +272,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.31.28" +version = "1.31.32" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.31.28-py3-none-any.whl", hash = "sha256:d6310826e37ba0209e904d691638b8e848342ec17f5187568ca02ad092c55c45"}, - {file = "botocore-1.31.28.tar.gz", hash = "sha256:1fcfbd23c7f1f66f16c5c1a1e8565ee8ff68429cc0ee9d2acfb1b55739584cbd"}, + {file = "botocore-1.31.32-py3-none-any.whl", hash = "sha256:8992ac186988c4b4cc168e8e479e9472da1442b193c1bf7c9dcd1877ec62d23c"}, + {file = "botocore-1.31.32.tar.gz", hash = "sha256:7a07d8dc8cc47bf23af39409ada81f388eb78233e1bb2cde0c415756da753664"}, ] [package.dependencies] @@ -600,14 +600,14 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "dcicutils" -version = "7.8.0" +version = "7.9.0" description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" category = "main" optional = false python-versions = ">=3.7,<3.10" files = [ - {file = "dcicutils-7.8.0-py3-none-any.whl", hash = "sha256:d20b9c1edc01230ea6b979295c368d5be8b75b96032c32ffcfd1956847411de6"}, - {file = "dcicutils-7.8.0.tar.gz", hash = "sha256:069a34e5e22ae703a1e5a7c17c67f90e75e50af23c4be79fbf43ab617a27418d"}, + {file = "dcicutils-7.9.0-py3-none-any.whl", hash = "sha256:e15d7e6d9551e5a732abb7c84731df9f70c5011b0e19b8f4ce024a2ff1a56ad7"}, + {file = "dcicutils-7.9.0.tar.gz", hash = "sha256:0126f46f60c873c498f67bf137d585093f30e1373b0ea5d052a12920106e2395"}, ] [package.dependencies] @@ -1511,14 +1511,14 @@ et-xmlfile = "*" [[package]] name = "opensearch-py" -version = "2.3.0" +version = "2.3.1" description = "Python client for OpenSearch" category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" files = [ - {file = "opensearch-py-2.3.0.tar.gz", hash = "sha256:493b224d0f865f62663f689c3a7662b1edccf9ee6c7643f2561cafda04e3a66a"}, - {file = "opensearch_py-2.3.0-py2.py3-none-any.whl", hash = "sha256:4093d0bbe575979d67cfd74a153eb91d82e354e232d76c6980876321c73056c1"}, + {file = "opensearch-py-2.3.1.tar.gz", hash = "sha256:f82a2e914835f7d645a632777de9a62d0c0de60ffd2f8cdae2ccfa4cfc40a185"}, + {file = "opensearch_py-2.3.1-py2.py3-none-any.whl", hash = "sha256:eafbc5d56a7ca696afba7d77bcda1bbb849050cbf9265d57d8476576cb576395"}, ] [package.dependencies] @@ -2567,4 +2567,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">=3.7.1, <3.9" -content-hash = "ea127b8235772d06ead774dbda0bb9939b9fe935fff1175275536dc97b47f46e" +content-hash = "36b12decd9283e015963cb6bb219d83806101b349a9eb996b0df3e200daa1858" From 9ec69b77ea043c061269910f3bad91730e0fa2b3 Mon Sep 17 00:00:00 2001 From: aschroed Date: Wed, 23 Aug 2023 09:43:47 -0400 Subject: [PATCH 3/3] add PR to change log --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 47d5af5..e885756 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,8 @@ Change Log 2.3.0 ===== +`PR:106 update get_schemas calls _` + * update to use get_schemas function from dcicutils rather than having a 'broken' redundant copy * same for dump_json_data function * updated notebooks to call dcicutils versions of functions