Skip to content

Commit

Permalink
Merge pull request #1046 from Sage-Bionetworks/develop-table-upsert
Browse files Browse the repository at this point in the history
Introduce tests to cover current table operations: creation and replacement
  • Loading branch information
GiaJordan authored Jan 9, 2023
2 parents dc123b4 + d268af1 commit 37563a1
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 13 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
env:
POETRY_VERSION: 1.2.0rc1
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]

Expand Down Expand Up @@ -108,7 +109,7 @@ jobs:
run: >
source .venv/bin/activate;
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
-m "not google_credentials_needed"
-m "not (google_credentials_needed or table_operations)"
- name: Run tests
env:
Expand All @@ -118,7 +119,7 @@ jobs:
run: >
source .venv/bin/activate;
pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
-m "not (google_credentials_needed or rule_combos or schematic_api)"
-m "not (google_credentials_needed or rule_combos or schematic_api or table_operations)"
- name: Upload pytest test results
uses: actions/upload-artifact@v2
Expand Down
12 changes: 11 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,15 @@ markers = [
"""\
schematic_api: marks tests requiring \
running API locally (skipped on GitHub CI)
"""
""",
"""\
rule_combos: marks tests covering \
combinations of rules that aren't always necessary \
and can add significantly to CI runtime (skipped on GitHub CI unless prompted to run in commit message)
""",
"""\
table_operations: marks tests covering \
table operations that pass locally \
but fail on CI due to interactions with Synapse (skipped on GitHub CI)
"""
]
33 changes: 27 additions & 6 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# allows specifying explicit variable types
from typing import Dict, List, Tuple, Sequence, Union
from collections import OrderedDict
from tenacity import retry, stop_after_attempt, wait_chain, wait_fixed, retry_if_exception_type

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -93,6 +94,9 @@ def __init__(
except KeyError:
raise MissingConfigValueError(("synapse", "manifest_basename"))

self._query_fileview()

def _query_fileview(self):
try:
self.storageFileview = CONFIG["synapse"]["master_fileview"]
self.manifest = CONFIG["synapse"]["manifest_basename"]
Expand All @@ -108,7 +112,7 @@ def __init__(
except AttributeError:
raise AttributeError("storageFileview attribute has not been set.")
except SynapseHTTPError:
raise AccessCredentialsError(self.storageFileview)
raise AccessCredentialsError(self.storageFileview)

@staticmethod
def login(token=None, access_token=None, input_token=None):
Expand Down Expand Up @@ -725,16 +729,21 @@ def get_synapse_table(self, synapse_id: str) -> Tuple[pd.DataFrame, CsvFileTable

return df, results

def _get_tables(self, datasetId: str = None) -> List[Table]:
project = self.syn.get(self.getDatasetProject(datasetId))
def _get_tables(self, datasetId: str = None, projectId: str = None) -> List[Table]:
if projectId:
project = projectId
elif datasetId:
project = self.syn.get(self.getDatasetProject(datasetId))

return list(self.syn.getChildren(project, includeTypes=["table"]))

def get_table_info(self, datasetId: str = None) -> List[str]:
def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[str]:
"""Gets the names of the tables in the schema
Can pass in a synID for a dataset or project
Returns:
list[str]: A list of table names
"""
tables = self._get_tables(datasetId)
tables = self._get_tables(datasetId = datasetId, projectId = projectId)
if tables:
return {table["name"]: table["id"] for table in tables}
else:
Expand All @@ -743,7 +752,7 @@ def get_table_info(self, datasetId: str = None) -> List[str]:
@missing_entity_handler
def upload_format_manifest_table(self, se, manifest, datasetId, table_name, restrict, useSchemaLabel):
# Rename the manifest columns to display names to match fileview
table_info = self.get_table_info(datasetId)
table_info = self.get_table_info(datasetId = datasetId)

blacklist_chars = ['(', ')', '.', ' ', '-']
manifest_columns = manifest.columns.tolist()
Expand Down Expand Up @@ -1293,6 +1302,15 @@ def getDatasetAnnotations(
# Force all values as strings
return table.astype(str)

def raise_final_error(retry_state):
return retry_state.outcome.result()

@retry(stop = stop_after_attempt(5),
wait = wait_chain(*[wait_fixed(10) for i in range (2)] +
[wait_fixed(15) for i in range(2)] +
[wait_fixed(20)]),
retry=retry_if_exception_type(LookupError),
retry_error_callback = raise_final_error)
def getDatasetProject(self, datasetId: str) -> str:
"""Get parent project for a given dataset ID.
Expand All @@ -1306,6 +1324,9 @@ def getDatasetProject(self, datasetId: str) -> str:
Returns:
str: The Synapse ID for the parent project.
"""

self._query_fileview()

# Subset main file view
dataset_index = self.storageFileviewTable["id"] == datasetId
dataset_row = self.storageFileviewTable[dataset_index]
Expand Down
19 changes: 17 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from multiprocessing.sharedctypes import Value
import os
import logging
import platform
import sys

import shutil
import pytest
Expand Down Expand Up @@ -66,10 +66,25 @@ def get_schema_explorer(path=None, *paths):

@staticmethod
def get_python_version(self):
version=platform.python_version()
version=sys.version
base_version=".".join(version.split('.')[0:2])

return base_version

@staticmethod
def get_python_project(self):

version = self.get_python_version(Helpers)

python_projects = {
"3.7": "syn47217926",
"3.8": "syn47217967",
"3.9": "syn47218127",
"3.10": "syn47218347",
}

return python_projects[version]

@pytest.fixture(scope="session")
def helpers():
yield Helpers
Expand Down
10 changes: 10 additions & 0 deletions tests/data/mock_manifests/table_manifest.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462.0
FollowUp,73.0,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454.0
10 changes: 10 additions & 0 deletions tests/data/mock_manifests/table_manifest_replacement.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Component,Days to Follow Up,Adverse Event,Progression or Recurrence,Barretts Esophagus Goblet Cells Present,BMI,Cause of Response,Comorbidity,Comorbidity Method of Diagnosis,Days to Adverse Event,Days to Comorbidity,Diabetes Treatment Type,Disease Response,DLCO Ref Predictive Percent,ECOG Performance Status,FEV1 FVC Post Bronch Percent,FEV 1 FVC Pre Bronch Percent,FEV1 Ref Post Bronch Percent,FEV1 Ref Pre Bronch Percent,Height,Hepatitis Sustained Virological Response,HPV Positive Type,Karnofsky Performance Status,Menopause Status,Pancreatitis Onset Year,Reflux Treatment Type,Risk Factor,Risk Factor Treatment,Viral Hepatitis Serologies,Weight,Days to Progression,Days to Progression Free,Days to Recurrence,Progression or Recurrence Anatomic Site,Progression or Recurrence Type,entityId,HTAN Participant ID,Uuid
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249015,455,318e2d8c-83be-4b19-9a9f-5e97ce9c9cf7
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22249915,456,600666ee-1f96-49f8-b848-2be751cb59f9
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22687161,457,1fb32068-7b6c-4d12-8031-96d91229dda0
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22688035,458,964949f3-efe1-47cd-8113-dedb909fd312
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978930,459,c562590f-359a-4ff5-bbd7-47e7ad840a32
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978946,460,a77e0882-7b6f-42a9-9e60-c4d2bfd72bb7
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22978975,461,e7705ed5-ade4-4451-ae26-9c790e35e878
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn22979177,462,2897b73c-e225-4dc3-a093-96f645332ea3
FollowUp,89,,Not Reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,syn21989551,454,26a1cdba-dd37-483a-be6e-d1a4f5a9521b
129 changes: 127 additions & 2 deletions tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import math
import logging
import pytest
import time
from time import sleep
from tenacity import Retrying, RetryError, stop_after_attempt, wait_random_exponential

import pandas as pd
from synapseclient import EntityViewSchema
from synapseclient import EntityViewSchema, Folder

from schematic.models.metadata import MetadataModel
from schematic.store.base import BaseStorage
from schematic.store.synapse import SynapseStorage, DatasetFileView
from schematic.utils.cli_utils import get_from_config
Expand Down Expand Up @@ -47,6 +48,28 @@ def dataset_fileview_table_tidy(dataset_fileview, dataset_fileview_table):
table = dataset_fileview.tidy_table()
yield table

@pytest.fixture
def version(synapse_store, helpers):

yield helpers.get_python_version(helpers)

@pytest.fixture
def projectId(synapse_store, helpers):
projectId = helpers.get_python_project(helpers)
yield projectId

@pytest.fixture
def datasetId(synapse_store, projectId, helpers):
dataset = Folder(
name = 'Table Test Dataset ' + helpers.get_python_version(helpers),
parent = projectId,
)

datasetId = synapse_store.syn.store(dataset).id
sleep(5)
yield datasetId
synapse_store.syn.delete(datasetId)


def raise_final_error(retry_state):
return retry_state.outcome.result()
Expand Down Expand Up @@ -243,3 +266,105 @@ def test_tidy_table(self, dataset_fileview_table_tidy):
year_value = table.loc[sample_a_row, "YearofBirth"][0]
assert isinstance(year_value, str)
assert year_value == "1980"

@pytest.mark.table_operations
class TestTableOperations:

def test_createTable(self, helpers, synapse_store, config, projectId, datasetId):


# Check if FollowUp table exists if so delete
existing_tables = synapse_store.get_table_info(projectId = projectId)

table_name='followup_synapse_storage_manifest_table'

if table_name in existing_tables.keys():
synapse_store.syn.delete(existing_tables[table_name])
sleep(10)
# assert no table
assert table_name not in synapse_store.get_table_info(projectId = projectId).keys()

# associate metadata with files
manifest_path = "mock_manifests/table_manifest.csv"
inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location")))
sg = SchemaGenerator(inputModelLocaiton)

# updating file view on synapse takes a long time
manifestId = synapse_store.associateMetadataWithFiles(
schemaGenerator = sg,
metadataManifestPath = helpers.get_data_path(manifest_path),
datasetId = datasetId,
manifest_record_type = 'table',
useSchemaLabel = True,
hideBlanks = True,
restrict_manifest = False,
)
existing_tables = synapse_store.get_table_info(projectId = projectId)

# clean Up
synapse_store.syn.delete(manifestId)
# assert table exists
assert table_name in existing_tables.keys()

def test_replaceTable(self, helpers, synapse_store, config, projectId, datasetId):
table_name='followup_synapse_storage_manifest_table'
manifest_path = "mock_manifests/table_manifest.csv"
replacement_manifest_path = "mock_manifests/table_manifest_replacement.csv"
column_of_interest="DaystoFollowUp"

# Check if FollowUp table exists if so delete
existing_tables = synapse_store.get_table_info(projectId = projectId)

if table_name in existing_tables.keys():
synapse_store.syn.delete(existing_tables[table_name])
sleep(10)
# assert no table
assert table_name not in synapse_store.get_table_info(projectId = projectId).keys()

# associate org FollowUp metadata with files
inputModelLocaiton = helpers.get_data_path(get_from_config(config.DATA, ("model", "input", "location")))
sg = SchemaGenerator(inputModelLocaiton)

# updating file view on synapse takes a long time
manifestId = synapse_store.associateMetadataWithFiles(
schemaGenerator = sg,
metadataManifestPath = helpers.get_data_path(manifest_path),
datasetId = datasetId,
manifest_record_type = 'table',
useSchemaLabel = True,
hideBlanks = True,
restrict_manifest = False,
)
existing_tables = synapse_store.get_table_info(projectId = projectId)

# Query table for DaystoFollowUp column
tableId = existing_tables[table_name]
daysToFollowUp = synapse_store.syn.tableQuery(
f"SELECT {column_of_interest} FROM {tableId}"
).asDataFrame().squeeze()

# assert Days to FollowUp == 73
assert (daysToFollowUp == '73.0').all()

# Associate replacement manifest with files
manifestId = synapse_store.associateMetadataWithFiles(
schemaGenerator = sg,
metadataManifestPath = helpers.get_data_path(replacement_manifest_path),
datasetId = datasetId,
manifest_record_type = 'table',
useSchemaLabel = True,
hideBlanks = True,
restrict_manifest = False,
)
existing_tables = synapse_store.get_table_info(projectId = projectId)

# Query table for DaystoFollowUp column
tableId = existing_tables[table_name]
daysToFollowUp = synapse_store.syn.tableQuery(
f"SELECT {column_of_interest} FROM {tableId}"
).asDataFrame().squeeze()

# assert Days to FollowUp == 89 now and not 73
assert (daysToFollowUp == '89').all()
# delete table
synapse_store.syn.delete(tableId)

0 comments on commit 37563a1

Please sign in to comment.