diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 131f751af..012369891 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,6 +29,7 @@ jobs: env: POETRY_VERSION: 1.2.0rc1 strategy: + fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10"] @@ -108,7 +109,7 @@ jobs: run: > source .venv/bin/activate; pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/ - -m "not google_credentials_needed" + -m "not (google_credentials_needed or table_operations)" - name: Run tests env: @@ -118,7 +119,7 @@ jobs: run: > source .venv/bin/activate; pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/ - -m "not (google_credentials_needed or rule_combos or schematic_api)" + -m "not (google_credentials_needed or rule_combos or schematic_api or table_operations)" - name: Upload pytest test results uses: actions/upload-artifact@v2 diff --git a/pyproject.toml b/pyproject.toml index 9ca4d70e1..b3289d2c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,5 +118,15 @@ markers = [ """\ schematic_api: marks tests requiring \ running API locally (skipped on GitHub CI) - """ + """, + """\ + rule_combos: marks tests covering \ + combinations of rules that aren't always necessary \ + and can add significantly to CI runtime (skipped on GitHub CI unless prompted to run in commit message) + """, + """\ + table_operations: marks tests covering \ + table operations that pass locally \ + but fail on CI due to interactions with Synapse (skipped on GitHub CI) + """ ] diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 1673cfdde..86603932d 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -9,6 +9,7 @@ # allows specifying explicit variable types from typing import Dict, List, Tuple, Sequence, Union from collections import OrderedDict +from tenacity import retry, stop_after_attempt, wait_chain, wait_fixed, retry_if_exception_type import numpy as np import pandas as pd @@ -93,6 +94,9 @@ def __init__( except KeyError: raise MissingConfigValueError(("synapse", "manifest_basename")) + self._query_fileview() + + def _query_fileview(self): try: self.storageFileview = CONFIG["synapse"]["master_fileview"] self.manifest = CONFIG["synapse"]["manifest_basename"] @@ -108,7 +112,7 @@ def __init__( except AttributeError: raise AttributeError("storageFileview attribute has not been set.") except SynapseHTTPError: - raise AccessCredentialsError(self.storageFileview) + raise AccessCredentialsError(self.storageFileview) @staticmethod def login(token=None, access_token=None, input_token=None): @@ -725,16 +729,21 @@ def get_synapse_table(self, synapse_id: str) -> Tuple[pd.DataFrame, CsvFileTable return df, results - def _get_tables(self, datasetId: str = None) -> List[Table]: - project = self.syn.get(self.getDatasetProject(datasetId)) + def _get_tables(self, datasetId: str = None, projectId: str = None) -> List[Table]: + if projectId: + project = projectId + elif datasetId: + project = self.syn.get(self.getDatasetProject(datasetId)) + return list(self.syn.getChildren(project, includeTypes=["table"])) - def get_table_info(self, datasetId: str = None) -> List[str]: + def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[str]: """Gets the names of the tables in the schema + Can pass in a synID for a dataset or project Returns: list[str]: A list of table names """ - tables = self._get_tables(datasetId) + tables = self._get_tables(datasetId = datasetId, projectId = projectId) if tables: return {table["name"]: table["id"] for table in tables} else: @@ -743,7 +752,7 @@ def get_table_info(self, datasetId: str = None) -> List[str]: @missing_entity_handler def upload_format_manifest_table(self, se, manifest, datasetId, table_name, restrict, useSchemaLabel): # Rename the manifest columns to display names to match fileview - table_info = self.get_table_info(datasetId) + table_info = self.get_table_info(datasetId = datasetId) blacklist_chars = ['(', ')', '.', ' ', '-'] manifest_columns = manifest.columns.tolist() @@ -1293,6 +1302,15 @@ def getDatasetAnnotations( # Force all values as strings return table.astype(str) + def raise_final_error(retry_state): + return retry_state.outcome.result() + + @retry(stop = stop_after_attempt(5), + wait = wait_chain(*[wait_fixed(10) for i in range (2)] + + [wait_fixed(15) for i in range(2)] + + [wait_fixed(20)]), + retry=retry_if_exception_type(LookupError), + retry_error_callback = raise_final_error) def getDatasetProject(self, datasetId: str) -> str: """Get parent project for a given dataset ID. @@ -1306,6 +1324,9 @@ def getDatasetProject(self, datasetId: str) -> str: Returns: str: The Synapse ID for the parent project. """ + + self._query_fileview() + # Subset main file view dataset_index = self.storageFileviewTable["id"] == datasetId dataset_row = self.storageFileviewTable[dataset_index] diff --git a/tests/conftest.py b/tests/conftest.py index 0c0d1c716..7af6eb9a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ from multiprocessing.sharedctypes import Value import os import logging -import platform +import sys import shutil import pytest @@ -66,10 +66,25 @@ def get_schema_explorer(path=None, *paths): @staticmethod def get_python_version(self): - version=platform.python_version() + version=sys.version base_version=".".join(version.split('.')[0:2]) return base_version + + @staticmethod + def get_python_project(self): + + version = self.get_python_version(Helpers) + + python_projects = { + "3.7": "syn47217926", + "3.8": "syn47217967", + "3.9": "syn47218127", + "3.10": "syn47218347", + } + + return python_projects[version] + @pytest.fixture(scope="session") def helpers(): yield Helpers diff --git a/tests/data/mock_manifests/table_manifest.csv b/tests/data/mock_manifests/table_manifest.csv new file mode 100644 index 000000000..c64988577 --- /dev/null +++ b/tests/data/mock_manifests/table_manifest.csv @@ -0,0 +1,10 @@ +Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462.0 +FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454.0 diff --git a/tests/data/mock_manifests/table_manifest_replacement.csv b/tests/data/mock_manifests/table_manifest_replacement.csv new file mode 100644 index 000000000..71fd13e30 --- /dev/null +++ b/tests/data/mock_manifests/table_manifest_replacement.csv @@ -0,0 +1,10 @@ +Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID,Uuid +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455,318e2d8c-83be-4b19-9a9f-5e97ce9c9cf7 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456,600666ee-1f96-49f8-b848-2be751cb59f9 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457,1fb32068-7b6c-4d12-8031-96d91229dda0 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458,964949f3-efe1-47cd-8113-dedb909fd312 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459,c562590f-359a-4ff5-bbd7-47e7ad840a32 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460,a77e0882-7b6f-42a9-9e60-c4d2bfd72bb7 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461,e7705ed5-ade4-4451-ae26-9c790e35e878 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462,2897b73c-e225-4dc3-a093-96f645332ea3 +FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454,26a1cdba-dd37-483a-be6e-d1a4f5a9521b diff --git a/tests/test_store.py b/tests/test_store.py index b33d8e1e3..7ecbdb33c 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -3,12 +3,13 @@ import math import logging import pytest -import time +from time import sleep from tenacity import Retrying, RetryError, stop_after_attempt, wait_random_exponential import pandas as pd -from synapseclient import EntityViewSchema +from synapseclient import EntityViewSchema, Folder +from schematic.models.metadata import MetadataModel from schematic.store.base import BaseStorage from schematic.store.synapse import SynapseStorage, DatasetFileView from schematic.utils.cli_utils import get_from_config @@ -47,6 +48,28 @@ def dataset_fileview_table_tidy(dataset_fileview, dataset_fileview_table): table = dataset_fileview.tidy_table() yield table +@pytest.fixture +def version(synapse_store, helpers): + + yield helpers.get_python_version(helpers) + +@pytest.fixture +def projectId(synapse_store, helpers): + projectId = helpers.get_python_project(helpers) + yield projectId + +@pytest.fixture +def datasetId(synapse_store, projectId, helpers): + dataset = Folder( + name = 'Table Test Dataset ' + helpers.get_python_version(helpers), + parent = projectId, + ) + + datasetId = synapse_store.syn.store(dataset).id + sleep(5) + yield datasetId + synapse_store.syn.delete(datasetId) + def raise_final_error(retry_state): return retry_state.outcome.result() @@ -243,3 +266,105 @@ def test_tidy_table(self, dataset_fileview_table_tidy): year_value = table.loc[sample_a_row, "YearofBirth"][0] assert isinstance(year_value, str) assert year_value == "1980" + +@pytest.mark.table_operations +class TestTableOperations: + + def test_createTable(self, helpers, synapse_store, config, projectId, datasetId): + + + # Check if FollowUp table exists if so delete + existing_tables = synapse_store.get_table_info(projectId = projectId) + + table_name='followup_synapse_storage_manifest_table' + + if table_name in existing_tables.keys(): + synapse_store.syn.delete(existing_tables[table_name]) + sleep(10) + # assert no table + assert table_name not in synapse_store.get_table_info(projectId = projectId).keys() + + # associate metadata with files + manifest_path = "mock_manifests/table_manifest.csv" + inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location"))) + sg = SchemaGenerator(inputModelLocaiton) + + # updating file view on synapse takes a long time + manifestId = synapse_store.associateMetadataWithFiles( + schemaGenerator = sg, + metadataManifestPath = helpers.get_data_path(manifest_path), + datasetId = datasetId, + manifest_record_type = 'table', + useSchemaLabel = True, + hideBlanks = True, + restrict_manifest = False, + ) + existing_tables = synapse_store.get_table_info(projectId = projectId) + + # clean Up + synapse_store.syn.delete(manifestId) + # assert table exists + assert table_name in existing_tables.keys() + + def test_replaceTable(self, helpers, synapse_store, config, projectId, datasetId): + table_name='followup_synapse_storage_manifest_table' + manifest_path = "mock_manifests/table_manifest.csv" + replacement_manifest_path = "mock_manifests/table_manifest_replacement.csv" + column_of_interest="DaystoFollowUp" + + # Check if FollowUp table exists if so delete + existing_tables = synapse_store.get_table_info(projectId = projectId) + + if table_name in existing_tables.keys(): + synapse_store.syn.delete(existing_tables[table_name]) + sleep(10) + # assert no table + assert table_name not in synapse_store.get_table_info(projectId = projectId).keys() + + # associate org FollowUp metadata with files + inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location"))) + sg = SchemaGenerator(inputModelLocaiton) + + # updating file view on synapse takes a long time + manifestId = synapse_store.associateMetadataWithFiles( + schemaGenerator = sg, + metadataManifestPath = helpers.get_data_path(manifest_path), + datasetId = datasetId, + manifest_record_type = 'table', + useSchemaLabel = True, + hideBlanks = True, + restrict_manifest = False, + ) + existing_tables = synapse_store.get_table_info(projectId = projectId) + + # Query table for DaystoFollowUp column + tableId = existing_tables[table_name] + daysToFollowUp = synapse_store.syn.tableQuery( + f"SELECT {column_of_interest} FROM {tableId}" + ).asDataFrame().squeeze() + + # assert Days to FollowUp == 73 + assert (daysToFollowUp == '73.0').all() + + # Associate replacement manifest with files + manifestId = synapse_store.associateMetadataWithFiles( + schemaGenerator = sg, + metadataManifestPath = helpers.get_data_path(replacement_manifest_path), + datasetId = datasetId, + manifest_record_type = 'table', + useSchemaLabel = True, + hideBlanks = True, + restrict_manifest = False, + ) + existing_tables = synapse_store.get_table_info(projectId = projectId) + + # Query table for DaystoFollowUp column + tableId = existing_tables[table_name] + daysToFollowUp = synapse_store.syn.tableQuery( + f"SELECT {column_of_interest} FROM {tableId}" + ).asDataFrame().squeeze() + + # assert Days to FollowUp == 89 now and not 73 + assert (daysToFollowUp == '89').all() + # delete table + synapse_store.syn.delete(tableId)