Merge pull request #1317 from Sage-Bionetworks/develop

Schematic `v23.11.1`
Sage-Bionetworks · Nov 3, 2023 · eea1276 · eea1276
2 parents 6eb54b5 + 07d90e7
commit eea1276
Show file tree

Hide file tree

Showing 23 changed files with 2,602 additions and 2,564 deletions.
diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml
@@ -15,7 +15,7 @@ jobs:
   test:
     runs-on: ubuntu-latest
     env:
-      POETRY_VERSION:  1.2.0
+      POETRY_VERSION:  1.3.0
     strategy:
       fail-fast: false
       matrix:
@@ -85,4 +85,4 @@ jobs:
         if: ${{ false == inputs.perform_benchmarking }}
         run: >
           source .venv/bin/activate;
-          pytest -m "schematic_api and not submission and not rule_benchmark"
+          pytest -m "schematic_api and not submission and not rule_benchmark"
diff --git a/.github/workflows/pdoc.yml b/.github/workflows/pdoc.yml
@@ -27,7 +27,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     env:
-      POETRY_VERSION:  1.2.0
+      POETRY_VERSION:  1.3.0
     strategy:
       matrix:
         python-version: ["3.9", "3.10"]
@@ -93,4 +93,4 @@ jobs:
       url: ${{ steps.deployment.outputs.page_url }}
     steps:
       - id: deployment
-        uses: actions/deploy-pages@v1
+        uses: actions/deploy-pages@v1
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -9,7 +9,7 @@ jobs:
   pypi_release:
     runs-on: ubuntu-latest
     env:
-      POETRY_VERSION:  1.2.0
+      POETRY_VERSION:  1.3.0
     if: github.event_name == 'push' && contains(github.ref, 'refs/tags')
     steps:
       #----------------------------------------------

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,7 +27,7 @@ jobs:
   test:
     runs-on: ubuntu-latest
     env:
-      POETRY_VERSION:  1.2.0
+      POETRY_VERSION:  1.3.0
     strategy:
       fail-fast: false
       matrix:
@@ -127,7 +127,7 @@ jobs:
         if: ${{ contains(github.event.head_commit.message, 'runcombos') }}
         run: >
           source .venv/bin/activate;
-          pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
+          pytest --durations=0 --cov-report=term --cov-report=html:htmlcov --cov=schematic/
           -m "not (google_credentials_needed or schematic_api or table_operations)"
 
       - name: Run tests
@@ -137,7 +137,7 @@ jobs:
         if: ${{ false == contains(github.event.head_commit.message, 'runcombos') }}
         run: >
           source .venv/bin/activate;
-          pytest --cov-report=term --cov-report=html:htmlcov --cov=schematic/
+          pytest --durations=0 --cov-report=term --cov-report=html:htmlcov --cov=schematic/
           -m "not (google_credentials_needed or rule_combos or schematic_api or table_operations)"
 
       - name: Upload pytest test results

diff --git a/README.md b/README.md
@@ -59,7 +59,7 @@ Please note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in
 ```
 git clone https://github.com/Sage-Bionetworks/schematic.git
 ```
-2. Install `poetry` (version 1.2 or later) using either the [official installer](https://python-poetry.org/docs/#installing-with-the-official-installer) or [pipx](https://python-poetry.org/docs/#installing-with-pipx). If you have an older installation of Poetry, we recommend uninstalling it first. 
+2. Install `poetry` (version 1.3.0 or later) using either the [official installer](https://python-poetry.org/docs/#installing-with-the-official-installer) or [pipx](https://python-poetry.org/docs/#installing-with-pipx). If you have an older installation of Poetry, we recommend uninstalling it first. 
 
 3. Start the virtual environment by doing: 
 ```

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "schematicpy"
-version = "22.3.2"
+version = "23.11.1"
 description = "Package for biomedical data model and metadata ingress management"
 authors = [
     "Milen Nikolov <[email protected]>",
@@ -54,7 +54,7 @@ pygsheets = "^2.0.4"
 PyYAML = "^6.0.0"
 rdflib = "^6.0.0"
 setuptools = "^66.0.0"
-synapseclient = "^2.7.0"
+synapseclient = "^3.1.1"
 tenacity = "^8.0.1"
 toml = "^0.10.2"
 Flask = "^2.0.0"
@@ -70,7 +70,7 @@ Flask-Cors = "^3.0.10"
 pdoc = "^12.2.0"
 dateparser = "^1.1.4"
 pandarallel = "^1.6.4"
-schematic-db = {version = "^0.0.29", extras = ["synapse"]}
+schematic-db = {version = "0.0.dev33", extras = ["synapse"]}
 pyopenssl = "^23.0.0"
 typing-extensions = "<4.6.0"
 

diff --git a/schematic/manifest/generator.py b/schematic/manifest/generator.py
@@ -54,7 +54,11 @@ def __init__(
         self.creds = services_creds["creds"]
 
         # schema root
-        self.root = root
+        if root:
+            self.root = root
+        # Raise an error if no DataType has been provided
+        else:
+            raise ValueError("No DataType has been provided.")
 
         # alphabetize valid values
         self.alphabetize = alphabetize_valid_values
@@ -79,12 +83,19 @@ def __init__(
 
         # additional metadata to add to manifest
         self.additional_metadata = additional_metadata
+
+        # Check if the class is in the schema
+        root_in_schema = self.sg.se.is_class_in_schema(self.root)
+
+        # If the class could not be found, give a notification
+        if not root_in_schema:
+            exception_message = f"The DataType entered ({self.root}) could not be found in the data model schema. " + \
+                                "Please confirm that the datatype is in the data model and that the spelling matches the class label in the .jsonld file."
+            raise LookupError(exception_message) 
 
         # Determine whether current data type is file-based
-        is_file_based = False
-        if self.root:
-            is_file_based = "Filename" in self.sg.get_node_dependencies(self.root)
-        self.is_file_based = is_file_based
+        self.is_file_based = "Filename" in self.sg.get_node_dependencies(self.root)
+
 
     def _attribute_to_letter(self, attribute, manifest_fields):
         """Map attribute to column letter in a google sheet"""

diff --git a/schematic/models/metadata.py b/schematic/models/metadata.py
@@ -186,7 +186,7 @@ def get_component_requirements(
 
     # TODO: abstract validation in its own module
     def validateModelManifest(
-        self, manifestPath: str, rootNode: str, restrict_rules: bool = False, jsonSchema: str = None, project_scope: List = None,
+        self, manifestPath: str, rootNode: str, restrict_rules: bool = False, jsonSchema: str = None, project_scope: List = None, access_token: str = None,
     ) -> List[str]:
         """Check if provided annotations manifest dataframe satisfies all model requirements.
 
@@ -251,7 +251,16 @@ def validateModelManifest(
 
             return errors, warnings
 
-        errors, warnings, manifest = validate_all(self, errors, warnings, manifest, manifestPath, self.sg, jsonSchema, restrict_rules, project_scope)
+        errors, warnings, manifest = validate_all(self, 
+                                                  errors=errors, 
+                                                  warnings=warnings, 
+                                                  manifest=manifest, 
+                                                  manifestPath=manifestPath, 
+                                                  sg=self.sg, 
+                                                  jsonSchema=jsonSchema, 
+                                                  restrict_rules=restrict_rules, 
+                                                  project_scope=project_scope, 
+                                                  access_token=access_token)
         return errors, warnings
 
     def populateModelManifest(self, title, manifestPath: str, rootNode: str, return_excel = False) -> str:
@@ -328,7 +337,7 @@ def submit_metadata_manifest(
 
             # automatic JSON schema generation and validation with that JSON schema
             val_errors, val_warnings = self.validateModelManifest(
-                manifestPath=manifest_path, rootNode=validate_component, restrict_rules=restrict_rules, project_scope=project_scope,
+                manifestPath=manifest_path, rootNode=validate_component, restrict_rules=restrict_rules, project_scope=project_scope, access_token=access_token
             )
 
             # if there are no errors in validation process

diff --git a/schematic/models/validate_attribute.py b/schematic/models/validate_attribute.py
@@ -27,6 +27,7 @@
                                             rule_in_rule_list,
                                             )
 
+from synapseclient.core.exceptions import SynapseNoCredentialsError
 
 logger = logging.getLogger(__name__)
 
@@ -564,13 +565,18 @@ class ValidateAttribute(object):
         - Add string length validator
     """
 
-    def get_target_manifests(target_component, project_scope: List):
+    def get_target_manifests(target_component, project_scope: List, access_token: str = None):
         t_manifest_search = perf_counter()
         target_manifest_IDs=[]
         target_dataset_IDs=[]
 
         #login
-        synStore = SynapseStorage(project_scope=project_scope)        
+        try:
+            synStore = SynapseStorage(access_token=access_token, project_scope=project_scope)        
+        except SynapseNoCredentialsError as e:
+            raise ValueError(
+                "No Synapse credentials were provided. Credentials must be provided to utilize cross-manfiest validation functionality."
+                ) from e
 
         #Get list of all projects user has access to
         projects = synStore.getStorageProjects(project_scope=project_scope)
@@ -893,7 +899,7 @@ def url_validation(self, val_rule: str, manifest_col: str, sg: SchemaGenerator,)
         return errors, warnings
 
     def cross_validation(
-        self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, sg: SchemaGenerator,
+        self, val_rule: str, manifest_col: pd.core.series.Series, project_scope: List, sg: SchemaGenerator, access_token: str,
     ) -> List[List[str]]:
         """
         Purpose:
@@ -921,7 +927,7 @@ def cross_validation(
 
 
         #Get IDs of manifests with target component
-        synStore, target_manifest_IDs, target_dataset_IDs = ValidateAttribute.get_target_manifests(target_component,project_scope)
+        synStore, target_manifest_IDs, target_dataset_IDs = ValidateAttribute.get_target_manifests(target_component, project_scope, access_token)
 
         t_cross_manifest = perf_counter()
         #Read each manifest

diff --git a/schematic/models/validate_manifest.py b/schematic/models/validate_manifest.py
@@ -62,7 +62,7 @@ def get_multiple_types_error(
         return ["NA", error_col, error_message, error_val]
 
     def validate_manifest_rules(
-        self, manifest: pd.core.frame.DataFrame, sg: SchemaGenerator, restrict_rules: bool, project_scope: List,
+        self, manifest: pd.core.frame.DataFrame, sg: SchemaGenerator, restrict_rules: bool, project_scope: List, access_token: Optional[str] = None,
     ) -> (pd.core.frame.DataFrame, List[List[str]]):
         """
         Purpose:
@@ -208,7 +208,7 @@ def validate_manifest_rules(
                         manifest[col] = manifest_col
                     elif validation_type.lower().startswith("match"):
                         vr_errors, vr_warnings = validation_method(
-                            self, rule, manifest[col], project_scope, sg,
+                            self, rule, manifest[col], project_scope, sg, access_token
                         )
                     else:
                         vr_errors, vr_warnings = validation_method(
@@ -256,9 +256,9 @@ def validate_manifest_values(self, manifest, jsonSchema, sg
         return errors, warnings
 
 
-def validate_all(self, errors, warnings, manifest, manifestPath, sg, jsonSchema, restrict_rules, project_scope: List):
+def validate_all(self, errors, warnings, manifest, manifestPath, sg, jsonSchema, restrict_rules, project_scope: List, access_token: str):
     vm = ValidateManifest(errors, manifest, manifestPath, sg, jsonSchema)
-    manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules(manifest, sg, restrict_rules, project_scope)
+    manifest, vmr_errors, vmr_warnings = vm.validate_manifest_rules(manifest, sg, restrict_rules, project_scope, access_token)
     if vmr_errors:
         errors.extend(vmr_errors)
     if vmr_warnings:

diff --git a/schematic/schemas/generator.py b/schematic/schemas/generator.py
@@ -340,6 +340,19 @@ def is_node_required(self, node_display_name: str) -> bool:
 
         return node_required
 
+    def get_node_display_name(self, node_label: str, mm_graph: nx.MultiDiGraph) -> list:
+        """Get display name associated with a given node label, return id if no display name.
+        Args:
+            node_label, str: Node to retrieve display name for
+        Returns:
+            node_display_name: display name of the node, or its id if it does not have a display name.
+        """
+        if "displayName" in mm_graph.nodes[node_label]:
+                    node_display_name = mm_graph.nodes[node_label]["displayName"]
+        else:
+            node_display_name = mm_graph.nodes[node_label]["id"].split(':')[1]
+        return node_display_name
+
     def get_nodes_display_names(
         self, node_list: List[str], mm_graph: nx.MultiDiGraph
     ) -> List[str]:
@@ -349,11 +362,10 @@ def get_nodes_display_names(
             node_list: List of nodes whose display names we need to retrieve.
 
         Returns:
-            List of display names.
+            List of display names, return id if no display name
         """
-        node_list_display_names = [
-            mm_graph.nodes[node]["displayName"] for node in node_list
-        ]
+
+        node_list_display_names = [self.get_node_display_name(node, mm_graph) for node in node_list]
 
         return node_list_display_names
 
@@ -430,8 +442,12 @@ def is_required(self, node_name: str, mm_graph: nx.MultiDiGraph) -> bool:
             Boolean value indicating if the node is required or not.
                 True: yes, it is required.
                 False: no, it is not required.
+            Return False, if no required key
         """
-        return mm_graph.nodes[node_name]["required"]
+        if "required" in mm_graph.nodes[node_name]:
+            return mm_graph.nodes[node_name]["required"]
+        else:
+            return False
 
     def get_json_schema_requirements(self, source_node: str, schema_name: str) -> Dict:
         """Consolidated method that aims to gather dependencies and value constraints across terms / nodes in a schema.org schema and store them in a jsonschema /JSON Schema schema.
@@ -505,7 +521,7 @@ def get_json_schema_requirements(self, source_node: str, schema_name: str) -> Di
                 )
 
                 # get process node display name
-                node_display_name = mm_graph.nodes[process_node]["displayName"]
+                node_display_name = self.get_node_display_name(node_label=process_node, mm_graph=mm_graph)
 
                 # updating map between node and node's valid values
                 for n in node_range_d: