diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index d6eba97de..c05af157f 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -1,9 +1,8 @@ +"""Synapse storage class""" + import atexit -from collections import OrderedDict from copy import deepcopy -from datetime import datetime, timedelta from dataclasses import dataclass -import json import logging import numpy as np import pandas as pd @@ -40,17 +39,15 @@ ) from synapseclient.entity import File from synapseclient.table import CsvFileTable, build_table, Schema -from synapseclient.annotations import from_synapse_annotations from synapseclient.core.exceptions import ( SynapseHTTPError, SynapseAuthenticationError, SynapseUnmetAccessRestrictions, + SynapseHTTPError, ) import synapseutils from synapseutils.copy_functions import changeFileMetaData -import uuid - from schematic_db.rdb.synapse_database import SynapseDatabase from schematic.schemas.data_model_graph import DataModelGraphExplorer @@ -71,7 +68,7 @@ from schematic.utils.schema_utils import get_class_label_from_display_name from schematic.store.base import BaseStorage -from schematic.exceptions import MissingConfigValueError, AccessCredentialsError +from schematic.exceptions import AccessCredentialsError from schematic.configuration.configuration import CONFIG logger = logging.getLogger("Synapse storage") @@ -199,34 +196,38 @@ def __init__( token: Optional[str] = None, # optional parameter retrieved from browser cookie access_token: Optional[str] = None, project_scope: Optional[list] = None, + synapse_cache_path: Optional[str] = None, ) -> None: """Initializes a SynapseStorage object. + Args: - syn: an object of type synapseclient. - token: optional token parameter (typically a 'str') as found in browser cookie upon login to synapse. - access_token: optional access token (personal or oauth) - TODO: move away from specific project setup and work with an interface that Synapse specifies (e.g. based on schemas). - Exceptions: - KeyError: when the 'storage' config object is missing values for essential keys. - AttributeError: when the 'storageFileview' attribute (of class SynapseStorage) does not have a value associated with it. - synapseclient.core.exceptions.SynapseHTTPError: check if the current user has permission to access the Synapse entity. - ValueError: when Admin fileview cannot be found (describe further). - Typical usage example: - syn_store = SynapseStorage() - """ - # TODO: turn root_synapse_cache to a parameter in init - self.syn = self.login(token, access_token) + token (Optional[str], optional): + Optional token parameter as found in browser cookie upon login to synapse. + Defaults to None. + access_token (Optional[list], optional): + Optional access token (personal or oauth). + Defaults to None. + project_scope (Optional[list], optional): Defaults to None. + synapse_cache_path (Optional[str], optional): + Location of synapse cache. + Defaults to None. + """ + self.syn = self.login(synapse_cache_path, token, access_token) self.project_scope = project_scope self.storageFileview = CONFIG.synapse_master_fileview_id self.manifest = CONFIG.synapse_manifest_basename - self.root_synapse_cache = "/root/.synapseCache" + self.root_synapse_cache = self.syn.cache.cache_root_dir self._query_fileview() - def _purge_synapse_cache(self, maximum_storage_allowed_cache_gb=1): + def _purge_synapse_cache( + self, maximum_storage_allowed_cache_gb: int = 1, minute_buffer: int = 15 + ) -> None: """ Purge synapse cache if it exceeds a certain size. Default to 1GB. Args: - maximum_storage_allowed_cache_gb: the maximum storage allowed before purging cache. Default is 1 GB. + maximum_storage_allowed_cache_gb (int): the maximum storage allowed + before purging cache. Default is 1 GB. + minute_buffer (int): All files created this amount of time or older will be deleted """ # try clearing the cache # scan a directory and check size of files @@ -238,7 +239,9 @@ def _purge_synapse_cache(self, maximum_storage_allowed_cache_gb=1): dir_size_bytes = check_synapse_cache_size(directory=self.root_synapse_cache) # if 1 GB has already been taken, purge cache before 15 min if dir_size_bytes >= maximum_storage_allowed_cache_bytes: - num_of_deleted_files = clear_synapse_cache(self.syn.cache, minutes=15) + num_of_deleted_files = clear_synapse_cache( + self.syn.cache, minutes=minute_buffer + ) logger.info( f"{num_of_deleted_files} files have been deleted from {self.root_synapse_cache}" ) @@ -265,30 +268,52 @@ def _query_fileview(self): raise AccessCredentialsError(self.storageFileview) @staticmethod - def login(token=None, access_token=None): + def login( + synapse_cache_path: Optional[str] = None, + token: Optional[str] = None, + access_token: Optional[str] = None, + ) -> synapseclient.Synapse: + """Login to Synapse + + Args: + token (Optional[str], optional): A Synapse token. Defaults to None. + access_token (Optional[str], optional): A synapse access token. Defaults to None. + synapse_cache_path (Optional[str]): location of synapse cache + + Raises: + ValueError: If unable to login with token + ValueError: If unable to loging with access token + + Returns: + synapseclient.Synapse: A Synapse object that is logged in + """ # If no token is provided, try retrieving access token from environment if not token and not access_token: access_token = os.getenv("SYNAPSE_ACCESS_TOKEN") # login using a token if token: - syn = synapseclient.Synapse() - + syn = synapseclient.Synapse(cache_root_dir=synapse_cache_path) try: syn.login(sessionToken=token, silent=True) - except synapseclient.core.exceptions.SynapseHTTPError: - raise ValueError("Please make sure you are logged into synapse.org.") + except SynapseHTTPError as exc: + raise ValueError( + "Please make sure you are logged into synapse.org." + ) from exc elif access_token: try: - syn = synapseclient.Synapse() + syn = synapseclient.Synapse(cache_root_dir=synapse_cache_path) syn.default_headers["Authorization"] = f"Bearer {access_token}" - except synapseclient.core.exceptions.SynapseHTTPError: + except SynapseHTTPError as exc: raise ValueError( "No access to resources. Please make sure that your token is correct" - ) + ) from exc else: # login using synapse credentials provided by user in .synapseConfig (default) file - syn = synapseclient.Synapse(configPath=CONFIG.synapse_configuration_path) + syn = synapseclient.Synapse( + configPath=CONFIG.synapse_configuration_path, + cache_root_dir=synapse_cache_path, + ) syn.login(silent=True) return syn diff --git a/tests/test_store.py b/tests/test_store.py index 9b3048f9b..60e1eeb54 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1,28 +1,30 @@ +"""Tests for store module""" + from __future__ import annotations import logging import math import os from time import sleep -from unittest.mock import Mock, patch +from unittest.mock import patch +import shutil import pandas as pd import pytest from pandas.testing import assert_frame_equal from synapseclient import EntityViewSchema, Folder -from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.entity import File from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer -from schematic.schemas.data_model_relationships import DataModelRelationships -from schematic.models.metadata import MetadataModel from schematic.store.base import BaseStorage from schematic.store.synapse import ( DatasetFileView, ManifestDownload, + SynapseStorage ) +from schematic.utils.general import check_synapse_cache_size logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -92,11 +94,35 @@ def test_init(self): class TestSynapseStorage: - def test_init(self, synapse_store): + "Tests the SynapseStorage class" + + def test_init(self, synapse_store:SynapseStorage) -> None: + """Tests SynapseStorage.__init__""" assert synapse_store.storageFileview == "syn23643253" assert isinstance(synapse_store.storageFileviewTable, pd.DataFrame) - - def test_getFileAnnotations(self, synapse_store): + assert synapse_store.root_synapse_cache.endswith(".synapseCache") + + def test__purge_synapse_cache(self) -> None: + """Tests SynapseStorage._purge_synapse_cache""" + synapse_store = SynapseStorage(synapse_cache_path="test_cache_dir") + size_before_purge = check_synapse_cache_size(synapse_store.root_synapse_cache) + synapse_store._purge_synapse_cache( + maximum_storage_allowed_cache_gb=0.000001, + minute_buffer=0 + ) + size_after_purge = check_synapse_cache_size(synapse_store.root_synapse_cache) + assert size_before_purge > size_after_purge + shutil.rmtree("test_cache_dir") + + def test_login(self) -> None: + """Tests SynapseStorage.login""" + synapse_client = SynapseStorage.login() + assert synapse_client.cache.cache_root_dir.endswith(".synapseCache") + synapse_client = SynapseStorage.login("test_cache_dir") + assert synapse_client.cache.cache_root_dir == "test_cache_dir" + shutil.rmtree("test_cache_dir") + + def test_getFileAnnotations(self, synapse_store:SynapseStorage) -> None: expected_dict = { "author": "bruno, milen, sujay", "impact": "42.9", @@ -437,7 +463,6 @@ def test_get_files_metadata_from_dataset(self, synapse_store): "entityId": ["syn123", "syn456"], } - class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): assert dataset_fileview.datasetId == dataset_id