Skip to content

Commit

Permalink
Merge pull request #1394 from Sage-Bionetworks/develop-FDS-1446
Browse files Browse the repository at this point in the history
Add synapse cache as a parameter to SynapseStorage
  • Loading branch information
andrewelamb authored Apr 10, 2024
2 parents d01c201 + 1f27702 commit 00b28f2
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 42 deletions.
93 changes: 59 additions & 34 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""Synapse storage class"""

import atexit
from collections import OrderedDict
from copy import deepcopy
from datetime import datetime, timedelta
from dataclasses import dataclass
import json
import logging
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -40,17 +39,15 @@
)
from synapseclient.entity import File
from synapseclient.table import CsvFileTable, build_table, Schema
from synapseclient.annotations import from_synapse_annotations
from synapseclient.core.exceptions import (
SynapseHTTPError,
SynapseAuthenticationError,
SynapseUnmetAccessRestrictions,
SynapseHTTPError,
)
import synapseutils
from synapseutils.copy_functions import changeFileMetaData

import uuid

from schematic_db.rdb.synapse_database import SynapseDatabase

from schematic.schemas.data_model_graph import DataModelGraphExplorer
Expand All @@ -71,7 +68,7 @@
from schematic.utils.schema_utils import get_class_label_from_display_name

from schematic.store.base import BaseStorage
from schematic.exceptions import MissingConfigValueError, AccessCredentialsError
from schematic.exceptions import AccessCredentialsError
from schematic.configuration.configuration import CONFIG

logger = logging.getLogger("Synapse storage")
Expand Down Expand Up @@ -199,34 +196,38 @@ def __init__(
token: Optional[str] = None, # optional parameter retrieved from browser cookie
access_token: Optional[str] = None,
project_scope: Optional[list] = None,
synapse_cache_path: Optional[str] = None,
) -> None:
"""Initializes a SynapseStorage object.
Args:
syn: an object of type synapseclient.
token: optional token parameter (typically a 'str') as found in browser cookie upon login to synapse.
access_token: optional access token (personal or oauth)
TODO: move away from specific project setup and work with an interface that Synapse specifies (e.g. based on schemas).
Exceptions:
KeyError: when the 'storage' config object is missing values for essential keys.
AttributeError: when the 'storageFileview' attribute (of class SynapseStorage) does not have a value associated with it.
synapseclient.core.exceptions.SynapseHTTPError: check if the current user has permission to access the Synapse entity.
ValueError: when Admin fileview cannot be found (describe further).
Typical usage example:
syn_store = SynapseStorage()
"""
# TODO: turn root_synapse_cache to a parameter in init
self.syn = self.login(token, access_token)
token (Optional[str], optional):
Optional token parameter as found in browser cookie upon login to synapse.
Defaults to None.
access_token (Optional[list], optional):
Optional access token (personal or oauth).
Defaults to None.
project_scope (Optional[list], optional): Defaults to None.
synapse_cache_path (Optional[str], optional):
Location of synapse cache.
Defaults to None.
"""
self.syn = self.login(synapse_cache_path, token, access_token)
self.project_scope = project_scope
self.storageFileview = CONFIG.synapse_master_fileview_id
self.manifest = CONFIG.synapse_manifest_basename
self.root_synapse_cache = "/root/.synapseCache"
self.root_synapse_cache = self.syn.cache.cache_root_dir
self._query_fileview()

def _purge_synapse_cache(self, maximum_storage_allowed_cache_gb=1):
def _purge_synapse_cache(
self, maximum_storage_allowed_cache_gb: int = 1, minute_buffer: int = 15
) -> None:
"""
Purge synapse cache if it exceeds a certain size. Default to 1GB.
Args:
maximum_storage_allowed_cache_gb: the maximum storage allowed before purging cache. Default is 1 GB.
maximum_storage_allowed_cache_gb (int): the maximum storage allowed
before purging cache. Default is 1 GB.
minute_buffer (int): All files created this amount of time or older will be deleted
"""
# try clearing the cache
# scan a directory and check size of files
Expand All @@ -238,7 +239,9 @@ def _purge_synapse_cache(self, maximum_storage_allowed_cache_gb=1):
dir_size_bytes = check_synapse_cache_size(directory=self.root_synapse_cache)
# if 1 GB has already been taken, purge cache before 15 min
if dir_size_bytes >= maximum_storage_allowed_cache_bytes:
num_of_deleted_files = clear_synapse_cache(self.syn.cache, minutes=15)
num_of_deleted_files = clear_synapse_cache(
self.syn.cache, minutes=minute_buffer
)
logger.info(
f"{num_of_deleted_files} files have been deleted from {self.root_synapse_cache}"
)
Expand All @@ -265,30 +268,52 @@ def _query_fileview(self):
raise AccessCredentialsError(self.storageFileview)

@staticmethod
def login(token=None, access_token=None):
def login(
synapse_cache_path: Optional[str] = None,
token: Optional[str] = None,
access_token: Optional[str] = None,
) -> synapseclient.Synapse:
"""Login to Synapse
Args:
token (Optional[str], optional): A Synapse token. Defaults to None.
access_token (Optional[str], optional): A synapse access token. Defaults to None.
synapse_cache_path (Optional[str]): location of synapse cache
Raises:
ValueError: If unable to login with token
ValueError: If unable to loging with access token
Returns:
synapseclient.Synapse: A Synapse object that is logged in
"""
# If no token is provided, try retrieving access token from environment
if not token and not access_token:
access_token = os.getenv("SYNAPSE_ACCESS_TOKEN")

# login using a token
if token:
syn = synapseclient.Synapse()

syn = synapseclient.Synapse(cache_root_dir=synapse_cache_path)
try:
syn.login(sessionToken=token, silent=True)
except synapseclient.core.exceptions.SynapseHTTPError:
raise ValueError("Please make sure you are logged into synapse.org.")
except SynapseHTTPError as exc:
raise ValueError(
"Please make sure you are logged into synapse.org."
) from exc
elif access_token:
try:
syn = synapseclient.Synapse()
syn = synapseclient.Synapse(cache_root_dir=synapse_cache_path)
syn.default_headers["Authorization"] = f"Bearer {access_token}"
except synapseclient.core.exceptions.SynapseHTTPError:
except SynapseHTTPError as exc:
raise ValueError(
"No access to resources. Please make sure that your token is correct"
)
) from exc
else:
# login using synapse credentials provided by user in .synapseConfig (default) file
syn = synapseclient.Synapse(configPath=CONFIG.synapse_configuration_path)
syn = synapseclient.Synapse(
configPath=CONFIG.synapse_configuration_path,
cache_root_dir=synapse_cache_path,
)
syn.login(silent=True)
return syn

Expand Down
41 changes: 33 additions & 8 deletions tests/test_store.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
"""Tests for store module"""

from __future__ import annotations

import logging
import math
import os
from time import sleep
from unittest.mock import Mock, patch
from unittest.mock import patch
import shutil

import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
from synapseclient import EntityViewSchema, Folder
from synapseclient.core.exceptions import SynapseHTTPError
from synapseclient.entity import File

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.schemas.data_model_relationships import DataModelRelationships

from schematic.models.metadata import MetadataModel
from schematic.store.base import BaseStorage
from schematic.store.synapse import (
DatasetFileView,
ManifestDownload,
SynapseStorage
)
from schematic.utils.general import check_synapse_cache_size

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -92,11 +94,35 @@ def test_init(self):


class TestSynapseStorage:
def test_init(self, synapse_store):
"Tests the SynapseStorage class"

def test_init(self, synapse_store:SynapseStorage) -> None:
"""Tests SynapseStorage.__init__"""
assert synapse_store.storageFileview == "syn23643253"
assert isinstance(synapse_store.storageFileviewTable, pd.DataFrame)

def test_getFileAnnotations(self, synapse_store):
assert synapse_store.root_synapse_cache.endswith(".synapseCache")

def test__purge_synapse_cache(self) -> None:
"""Tests SynapseStorage._purge_synapse_cache"""
synapse_store = SynapseStorage(synapse_cache_path="test_cache_dir")
size_before_purge = check_synapse_cache_size(synapse_store.root_synapse_cache)
synapse_store._purge_synapse_cache(
maximum_storage_allowed_cache_gb=0.000001,
minute_buffer=0
)
size_after_purge = check_synapse_cache_size(synapse_store.root_synapse_cache)
assert size_before_purge > size_after_purge
shutil.rmtree("test_cache_dir")

def test_login(self) -> None:
"""Tests SynapseStorage.login"""
synapse_client = SynapseStorage.login()
assert synapse_client.cache.cache_root_dir.endswith(".synapseCache")
synapse_client = SynapseStorage.login("test_cache_dir")
assert synapse_client.cache.cache_root_dir == "test_cache_dir"
shutil.rmtree("test_cache_dir")

def test_getFileAnnotations(self, synapse_store:SynapseStorage) -> None:
expected_dict = {
"author": "bruno, milen, sujay",
"impact": "42.9",
Expand Down Expand Up @@ -437,7 +463,6 @@ def test_get_files_metadata_from_dataset(self, synapse_store):
"entityId": ["syn123", "syn456"],
}


class TestDatasetFileView:
def test_init(self, dataset_id, dataset_fileview, synapse_store):
assert dataset_fileview.datasetId == dataset_id
Expand Down

0 comments on commit 00b28f2

Please sign in to comment.