Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing improvements for snovault and indirectly for cgap #241

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ configure: # does any pre-requisite installs
@#pip install --upgrade pip==21.0.1
pip install --upgrade pip
@#pip install poetry==1.1.9 # this version is known to work. -kmp 5-Oct-2021
pip install wheel
pip install poetry

macpoetry-install:
Expand All @@ -29,6 +30,13 @@ test:
@git log -1 --decorate | head -1
@date

test-full:
@git log -1 --decorate | head -1
@date
pytest -vv --timeout=200
@git log -1 --decorate | head -1
@date

remote-test-npm:
poetry run pytest -xvvv --timeout=400 --durations=100 --aws-auth --es search-fourfront-testing-opensearch-kqm7pliix4wgiu4druk2indorq.us-east-1.es.amazonaws.com:443 -m "indexing"

Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dcicsnovault"
version = "7.1.1"
version = "7.1.3.2b2" # to become "7.1.2"
description = "Storage support for 4DN Data Portals."
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down Expand Up @@ -31,7 +31,6 @@ classifiers = [
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
]

Expand Down
8 changes: 4 additions & 4 deletions snovault/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import time
import tempfile
# import os
# import time
# import tempfile
import pytest
import logging
import subprocess
# import subprocess

from ..elasticsearch.indexer_queue import QueueManager

Expand Down
4 changes: 3 additions & 1 deletion snovault/tests/test_embedding.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
import time
# import time

from dcicutils.misc_utils import ignored
from dcicutils.qa_utils import notice_pytest_fixtures, Retry, Eventually
Expand Down Expand Up @@ -112,9 +112,11 @@ def test_linked_uuids_object(content, dummy_request, threadlocals):
# needed to track _linked_uuids
dummy_request._indexing_view = True
dummy_request.embed('/testing-link-sources-sno/', sources[0]['uuid'], '@@object')

def my_assertions():
assert dummy_request._linked_uuids == {('16157204-8c8f-4672-a1a4-14f4b8021fcd', 'TestingLinkSourceSno')}
assert dummy_request._rev_linked_uuids_by_item == {}

Eventually.call_assertion(my_assertions)


Expand Down
2 changes: 1 addition & 1 deletion snovault/tests/test_storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest import mock
import pytest
import re
import transaction as transaction_management
# import transaction as transaction_management
import uuid
import boto3

Expand Down
75 changes: 75 additions & 0 deletions snovault/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import contextlib
import webtest

from .interfaces import DBSESSION
from .elasticsearch import create_mapping


def make_testapp(app, *, http_accept=None, remote_user=None):
"""
By default, makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST"}.
Arguments can be used to override these defaults. An explicit None accepts the default.

:param app: a Pyramid app object.
:param http_accept: The value of HTTP_ACCEPT in the testapp's environ, or None accepting 'application/json'.
:param remote_user: The value of REMOTE_USER in the testapp's environ, or None accepting 'TEST'.
"""
environ = {
'HTTP_ACCEPT': http_accept or 'application/json',
'REMOTE_USER': remote_user or 'TEST',
}
testapp = webtest.TestApp(app, environ)
return testapp


def make_htmltestapp(app):
"""Makes a testapp with environ={"HTTP_ACCEPT": "text/html", "REMOTE_USER": "TEST"}"""
return make_testapp(app, http_accept='text/html')


def make_authenticated_testapp(app):
"""Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST_AUTHENTICATED"}"""
return make_testapp(app, remote_user='TEST_AUTHENTICATED')


def make_submitter_testapp(app):
"""Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "TEST_SUBMITTER"}"""
return make_testapp(app, remote_user='TEST_SUBMITTER')


def make_indexer_testapp(app):
"""Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "INDEXER"}"""
return make_testapp(app, remote_user='INDEXER')


def make_embed_testapp(app):
"""Makes a testapp with environ={"HTTP_ACCEPT": "application/json", "REMOTE_USER": "EMBED"}"""
return make_testapp(app, remote_user='EMBED')


class NoNestedCommit(BaseException):
"""
This is a pseudo-error class to be used as a special control construct
only for the purpose of implementing begin_nested.
"""
pass


@contextlib.contextmanager
def begin_nested(*, app, commit=True):
session = app.registry[DBSESSION]
connection = session.connection().connect()
try:
with connection.begin_nested():
yield
if not commit:
raise NoNestedCommit() # Raising an error will bypass an attempt to commit
except NoNestedCommit:
pass


@contextlib.contextmanager
def local_collections(*, app, collections):
with begin_nested(app=app, commit=False):
create_mapping.run(app, collections=collections, skip_indexing=True, purge_queue=True)
yield
15 changes: 13 additions & 2 deletions snovault/upgrader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
import venusian

from .interfaces import TYPES, UPGRADER
from pkg_resources import parse_version
from pkg_resources import parse_version as raw_parse_version
from pyramid.interfaces import PHASE1_CONFIG
from .interfaces import PHASE2_5_CONFIG


def parse_version(version_string):
# In older versions of pkg_resources.parse_version, passing '' would return LegacyVersion(''),
# which was an object that was always less than any other version than itself.
# We don't care about negative versions, so let's just say that parse_version
# for our purposes can return version 0.
return raw_parse_version(version_string or '0')
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fussed a bit about whether to default this to '-9999', '0', or '1', but I think we never use negative versions and '0' is therefore the lower bound. And I wanted a way to distinguish between this default and the earliest version I thought was likely to be specified explicitly, which is probably '1'.



NO_VERSION = parse_version('')


def includeme(config):
config.include('.typeinfo')
config.registry[UPGRADER] = Upgrader(config.registry)
Expand Down Expand Up @@ -96,7 +107,7 @@ def upgrade(self, value, current_version='1', target_version=None, **kw):
# If no entry exists for the current_version, fallback to ''
if parse_version(version) not in self.upgrade_steps:
try:
step = self.upgrade_steps[parse_version('')]
step = self.upgrade_steps[NO_VERSION]
except KeyError:
pass
else:
Expand Down
32 changes: 21 additions & 11 deletions snovault/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
import functools
import json
import os
import structlog
import sys

from copy import copy
from datetime import datetime, timedelta

import structlog
from pyramid.httpexceptions import HTTPForbidden
from pyramid.threadlocal import manager as threadlocal_manager

from .interfaces import CONNECTION, STORAGE, TYPES
from .settings import Settings


log = structlog.getLogger(__name__)


Expand All @@ -39,9 +40,9 @@
REFRESH_INTERVAL = '1s'

# Schema keys to ignore when finding embeds
SCHEMA_KEYS_TO_IGNORE_FOR_EMBEDS = set([
SCHEMA_KEYS_TO_IGNORE_FOR_EMBEDS = {
"items", "properties", "additionalProperties", "patternProperties"
])
}


class IndexSettings:
Expand Down Expand Up @@ -720,7 +721,7 @@ def crawl_schemas_by_embeds(item_type, types, split_path, schema):
element = split_path[idx]
# schema_cursor should always be a dictionary if we have more split_fields
if not isinstance(schema_cursor, dict):
error_message = '{} has a bad embed: {} does not have valid schemas throughout.'.format(item_type, linkTo_path)
error_message = f'{item_type} has a bad embed: {linkTo_path} does not have valid schemas throughout.'
return error_message, embeds_to_add
if element == '*':
linkTo_path = '.'.join(split_path[:-1])
Expand Down Expand Up @@ -875,11 +876,19 @@ def recursively_process_field(item, split_fields):
###########################


def _sid_cache(request):
return request._sid_cache # noQA. Centrally ignore that it's an access to a protected member.


def _sid_cache_update(request, new_value):
request._sid_cache.update(new_value) # noQA. Centrally ignore that it's an access to a protected member.


def check_es_and_cache_linked_sids(context, request, view='embedded'):
"""
For the given context and request, see if the desired item is present in
Elasticsearch and, if so, retrieve it cache all sids of the linked objects
that correspond to the given view. Store these in request._sid_cacheself.
that correspond to the given view. Store these in request's sid cache.

Args:
context: current Item
Expand All @@ -896,9 +905,9 @@ def check_es_and_cache_linked_sids(context, request, view='embedded'):
es_links_field = 'linked_uuids_object' if view == 'object' else 'linked_uuids_embedded'
if es_res and es_res.get(es_links_field):
linked_uuids = [link['uuid'] for link in es_res[es_links_field]
if link['uuid'] not in request._sid_cache]
if link['uuid'] not in _sid_cache(request)]
to_cache = request.registry[STORAGE].write.get_sids_by_uuids(linked_uuids)
request._sid_cache.update(to_cache)
_sid_cache_update(request, to_cache)
return es_res
return None

Expand Down Expand Up @@ -938,11 +947,12 @@ def validate_es_content(context, request, es_res, view='embedded'):
return False
for linked in linked_es_sids:
# infrequently, may need to add sids from the db to the _sid_cache
if linked['uuid'] not in request._sid_cache:
cached = _sid_cache(request)
found_sid = cached.get(linked['uuid'])
if not found_sid:
db_res = request.registry[STORAGE].write.get_by_uuid(linked['uuid'])
if db_res:
request._sid_cache[linked['uuid']] = db_res.sid
found_sid = request._sid_cache.get(linked['uuid'])
cached[linked['uuid']] = found_sid = db_res.sid
Comment on lines +950 to +955
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the logic here so please double-check carefully, but I think this way of expressing what found_sid is will be easier to read.

if found_sid is None or linked['sid'] < found_sid:
use_es_result = False
break
Expand Down