From cae111171ee368e1e590c9a9f26c2df56f3c1ddc Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 04:50:19 -0500 Subject: [PATCH 1/6] Add 'make test-full'. Install 'wheel' in 'make configure'. --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index 41d1dc03b..4500674f7 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ configure: # does any pre-requisite installs @#pip install --upgrade pip==21.0.1 pip install --upgrade pip @#pip install poetry==1.1.9 # this version is known to work. -kmp 5-Oct-2021 + pip install wheel pip install poetry macpoetry-install: @@ -29,6 +30,13 @@ test: @git log -1 --decorate | head -1 @date +test-full: + @git log -1 --decorate | head -1 + @date + pytest -vv --timeout=200 + @git log -1 --decorate | head -1 + @date + remote-test-npm: poetry run pytest -xvvv --timeout=400 --durations=100 --aws-auth --es search-fourfront-testing-opensearch-kqm7pliix4wgiu4druk2indorq.us-east-1.es.amazonaws.com:443 -m "indexing" From 7efeeca6093720ba10ec95328d6a4263a3cc17fc Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 04:51:13 -0500 Subject: [PATCH 2/6] In pyproject: Make a beta version. Remove classifier for Python 3.7. --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 643558b2e..ea463d68d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dcicsnovault" -version = "7.1.1" +version = "7.1.3.2b2" # to become "7.1.2" description = "Storage support for 4DN Data Portals." authors = ["4DN-DCIC Team "] license = "MIT" @@ -31,7 +31,6 @@ classifiers = [ # Specify the Python versions you support here. In particular, ensure # that you indicate whether you support Python 2, Python 3 or both. 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', ] From 53c540214dc6052cfdc6f54d468c779329d3cb46 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 04:52:13 -0500 Subject: [PATCH 3/6] Add functions useful to cgap in snovault/tools.py. --- snovault/tools.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 snovault/tools.py diff --git a/snovault/tools.py b/snovault/tools.py new file mode 100644 index 000000000..72ea97d5c --- /dev/null +++ b/snovault/tools.py @@ -0,0 +1,75 @@ +import contextlib +import webtest + +from .interfaces import DBSESSION +from .elasticsearch import create_mapping + + +def make_testapp(app, *, http_accept=None, remote_user=None): + """ + By default, makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}. + Arguments can be used to override these defaults. An explicit None accepts the default. + + :param app: a Pyramid app object. + :param http_accept: The value of HTTP_ACCEPT in the testapp's environ, or None accepting 'application/json'. + :param remote_user: The value of REMOTE_USER in the testapp's environ, or None accepting 'TEST'. + """ + environ = { + 'HTTP_ACCEPT': http_accept or 'application/json', + 'REMOTE_USER': remote_user or 'TEST', + } + testapp = webtest.TestApp(app, environ) + return testapp + + +def make_htmltestapp(app): + """Makes a testapp with environ={"HTTP_ACCEPT": "text/html", "REMOTE_USER": "TEST"}""" + return make_testapp(app, http_accept='text/html') + + +def make_authenticated_testapp(app): + """Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST_AUTHENTICATED"}""" + return make_testapp(app, remote_user='TEST_AUTHENTICATED') + + +def make_submitter_testapp(app): + """Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST_SUBMITTER"}""" + return make_testapp(app, remote_user='TEST_SUBMITTER') + + +def make_indexer_testapp(app): + """Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "INDEXER"}""" + return make_testapp(app, remote_user='INDEXER') + + +def make_embed_testapp(app): + """Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "EMBED"}""" + return make_testapp(app, remote_user='EMBED') + + +class NoNestedCommit(BaseException): + """ + This is a pseudo-error class to be used as a special control construct + only for the purpose of implementing begin_nested. + """ + pass + + +@contextlib.contextmanager +def begin_nested(*, app, commit=True): + session = app.registry[DBSESSION] + connection = session.connection().connect() + try: + with connection.begin_nested(): + yield + if not commit: + raise NoNestedCommit() # Raising an error will bypass an attempt to commit + except NoNestedCommit: + pass + + +@contextlib.contextmanager +def local_collections(*, app, collections): + with begin_nested(app=app, commit=False): + create_mapping.run(app, collections=collections, skip_indexing=True, purge_queue=True) + yield From 02c894bcf99356c2c987c79abb119fdcae8cf8e6 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 04:53:04 -0500 Subject: [PATCH 4/6] Various PEP8-driven changes in snovault/util.py --- snovault/util.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/snovault/util.py b/snovault/util.py index a9fafa17f..12a20efc8 100644 --- a/snovault/util.py +++ b/snovault/util.py @@ -3,17 +3,18 @@ import functools import json import os +import structlog import sys + from copy import copy from datetime import datetime, timedelta - -import structlog from pyramid.httpexceptions import HTTPForbidden from pyramid.threadlocal import manager as threadlocal_manager from .interfaces import CONNECTION, STORAGE, TYPES from .settings import Settings + log = structlog.getLogger(__name__) @@ -39,9 +40,9 @@ REFRESH_INTERVAL = '1s' # Schema keys to ignore when finding embeds -SCHEMA_KEYS_TO_IGNORE_FOR_EMBEDS = set([ +SCHEMA_KEYS_TO_IGNORE_FOR_EMBEDS = { "items", "properties", "additionalProperties", "patternProperties" -]) +} class IndexSettings: @@ -720,7 +721,7 @@ def crawl_schemas_by_embeds(item_type, types, split_path, schema): element = split_path[idx] # schema_cursor should always be a dictionary if we have more split_fields if not isinstance(schema_cursor, dict): - error_message = '{} has a bad embed: {} does not have valid schemas throughout.'.format(item_type, linkTo_path) + error_message = f'{item_type} has a bad embed: {linkTo_path} does not have valid schemas throughout.' return error_message, embeds_to_add if element == '*': linkTo_path = '.'.join(split_path[:-1]) @@ -875,11 +876,19 @@ def recursively_process_field(item, split_fields): ########################### +def _sid_cache(request): + return request._sid_cache # noQA. Centrally ignore that it's an access to a protected member. + + +def _sid_cache_update(request, new_value): + request._sid_cache.update(new_value) # noQA. Centrally ignore that it's an access to a protected member. + + def check_es_and_cache_linked_sids(context, request, view='embedded'): """ For the given context and request, see if the desired item is present in Elasticsearch and, if so, retrieve it cache all sids of the linked objects - that correspond to the given view. Store these in request._sid_cacheself. + that correspond to the given view. Store these in request's sid cache. Args: context: current Item @@ -896,9 +905,9 @@ def check_es_and_cache_linked_sids(context, request, view='embedded'): es_links_field = 'linked_uuids_object' if view == 'object' else 'linked_uuids_embedded' if es_res and es_res.get(es_links_field): linked_uuids = [link['uuid'] for link in es_res[es_links_field] - if link['uuid'] not in request._sid_cache] + if link['uuid'] not in _sid_cache(request)] to_cache = request.registry[STORAGE].write.get_sids_by_uuids(linked_uuids) - request._sid_cache.update(to_cache) + _sid_cache_update(request, to_cache) return es_res return None @@ -938,11 +947,12 @@ def validate_es_content(context, request, es_res, view='embedded'): return False for linked in linked_es_sids: # infrequently, may need to add sids from the db to the _sid_cache - if linked['uuid'] not in request._sid_cache: + cached = _sid_cache(request) + found_sid = cached.get(linked['uuid']) + if not found_sid: db_res = request.registry[STORAGE].write.get_by_uuid(linked['uuid']) if db_res: - request._sid_cache[linked['uuid']] = db_res.sid - found_sid = request._sid_cache.get(linked['uuid']) + cached[linked['uuid']] = found_sid = db_res.sid if found_sid is None or linked['sid'] < found_sid: use_es_result = False break From 9b054eaeeff68504ce6e633f9f7aacf2e8e8a791 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 05:50:46 -0500 Subject: [PATCH 5/6] PEP8 changes --- snovault/tests/conftest.py | 8 ++++---- snovault/tests/test_embedding.py | 4 +++- snovault/tests/test_storage.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/snovault/tests/conftest.py b/snovault/tests/conftest.py index 970f40389..c1e95c840 100644 --- a/snovault/tests/conftest.py +++ b/snovault/tests/conftest.py @@ -1,9 +1,9 @@ -import os -import time -import tempfile +# import os +# import time +# import tempfile import pytest import logging -import subprocess +# import subprocess from ..elasticsearch.indexer_queue import QueueManager diff --git a/snovault/tests/test_embedding.py b/snovault/tests/test_embedding.py index 480365be1..3c1a5a0cc 100644 --- a/snovault/tests/test_embedding.py +++ b/snovault/tests/test_embedding.py @@ -1,5 +1,5 @@ import pytest -import time +# import time from dcicutils.misc_utils import ignored from dcicutils.qa_utils import notice_pytest_fixtures, Retry, Eventually @@ -112,9 +112,11 @@ def test_linked_uuids_object(content, dummy_request, threadlocals): # needed to track _linked_uuids dummy_request._indexing_view = True dummy_request.embed('/testing-link-sources-sno/', sources[0]['uuid'], '@@object') + def my_assertions(): assert dummy_request._linked_uuids == {('16157204-8c8f-4672-a1a4-14f4b8021fcd', 'TestingLinkSourceSno')} assert dummy_request._rev_linked_uuids_by_item == {} + Eventually.call_assertion(my_assertions) diff --git a/snovault/tests/test_storage.py b/snovault/tests/test_storage.py index a78eca505..290381af1 100644 --- a/snovault/tests/test_storage.py +++ b/snovault/tests/test_storage.py @@ -1,7 +1,7 @@ from unittest import mock import pytest import re -import transaction as transaction_management +# import transaction as transaction_management import uuid import boto3 From 3f3a299ecceb366911313bca062cccf74165a135 Mon Sep 17 00:00:00 2001 From: Kent Pitman Date: Wed, 1 Feb 2023 06:10:15 -0500 Subject: [PATCH 6/6] Experimental workaround to a future problem in pkg_resources.parse_version --- snovault/upgrader.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/snovault/upgrader.py b/snovault/upgrader.py index 72bb947a6..5b9916139 100644 --- a/snovault/upgrader.py +++ b/snovault/upgrader.py @@ -1,11 +1,22 @@ import venusian from .interfaces import TYPES, UPGRADER -from pkg_resources import parse_version +from pkg_resources import parse_version as raw_parse_version from pyramid.interfaces import PHASE1_CONFIG from .interfaces import PHASE2_5_CONFIG +def parse_version(version_string): + # In older versions of pkg_resources.parse_version, passing '' would return LegacyVersion(''), + # which was an object that was always less than any other version than itself. + # We don't care about negative versions, so let's just say that parse_version + # for our purposes can return version 0. + return raw_parse_version(version_string or '0') + + +NO_VERSION = parse_version('') + + def includeme(config): config.include('.typeinfo') config.registry[UPGRADER] = Upgrader(config.registry) @@ -96,7 +107,7 @@ def upgrade(self, value, current_version='1', target_version=None, **kw): # If no entry exists for the current_version, fallback to '' if parse_version(version) not in self.upgrade_steps: try: - step = self.upgrade_steps[parse_version('')] + step = self.upgrade_steps[NO_VERSION] except KeyError: pass else: