Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: new search modes #291

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .trunk/configs/.cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
"setuptools",
"miniconda",
"pymdown",
"BCDB"
"BCDB",
"chlorobenzamide"
]
}
175 changes: 119 additions & 56 deletions src/cript/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from cript.api.vocabulary_categories import VocabCategories
from cript.nodes.exceptions import CRIPTNodeSchemaError
from cript.nodes.primary_nodes.project import Project
from cript.nodes.uuid_base import UUIDBaseNode

# Do not use this directly! That includes devs.
# Use the `_get_global_cached_api for access.
Expand Down Expand Up @@ -853,13 +854,7 @@ def download_file(self, file_source: str, destination_path: str = ".") -> None:
# the file is stored in cloud storage and must be retrieved via object_name
self._s3_client.download_file(Bucket=self._BUCKET_NAME, Key=file_source, Filename=destination_path) # type: ignore

@beartype
def search(
self,
node_type,
search_mode: SearchModes,
value_to_search: Optional[str],
) -> Paginator:
def search(self, node_type: UUIDBaseNode, search_mode: SearchModes, value_to_search: Optional[str] = None, parent_node: Optional[UUIDBaseNode] = None) -> Paginator:
"""
This method is used to perform search on the CRIPT platform.

Expand All @@ -868,50 +863,69 @@ def search(

Examples
--------
### Search by Node Type
```python
materials_paginator = cript_api.search(
node_type=cript.Material,
search_mode=cript.SearchModes.NODE_TYPE,
value_to_search=None
)
```

### Search by Contains name
```python
contains_name_paginator = cript_api.search(
node_type=cript.Process,
search_mode=cript.SearchModes.CONTAINS_NAME,
value_to_search="poly"
)
```

### Search by Exact Name
```python
exact_name_paginator = cript_api.search(
node_type=cript.Project,
search_mode=cript.SearchModes.EXACT_NAME,
value_to_search="Sodium polystyrene sulfonate"
)
```

### Search by UUID
```python
uuid_paginator = cript_api.search(
node_type=cript.Collection,
search_mode=cript.SearchModes.UUID,
value_to_search="75fd3ee5-48c2-4fc7-8d0b-842f4fc812b7"
)
```

### Search by BigSmiles
```python
paginator = cript_api.search(
node_type=cript.Material,
search_mode=cript.SearchModes.BIGSMILES,
value_to_search="{[][$]CC(C)(C(=O)OCCCC)[$][]}"
)
```
???+ Example "Search by Node Type"
```python
materials_paginator = cript_api.search(
node_type=cript.Material,
search_mode=cript.SearchModes.NODE_TYPE,
value_to_search=None
)
```

??? Example "Search by Contains name"
```python
contains_name_paginator = cript_api.search(
node_type=cript.Process,
search_mode=cript.SearchModes.CONTAINS_NAME,
value_to_search="poly"
)
```

??? Example "Search by Exact Name"
```python
exact_name_paginator = cript_api.search(
node_type=cript.Project,
search_mode=cript.SearchModes.EXACT_NAME,
value_to_search="Sodium polystyrene sulfonate"
)
```

??? Example "Search by UUID"
```python
uuid_paginator = cript_api.search(
node_type=cript.Collection,
search_mode=cript.SearchModes.UUID,
value_to_search="75fd3ee5-48c2-4fc7-8d0b-842f4fc812b7"
)
```

??? Example "Search by BigSmiles"
```python
paginator = cript_api.search(
node_type=cript.Material,
search_mode=cript.SearchModes.BIGSMILES,
value_to_search="{[][$]CC(C)(C(=O)OCCCC)[$][]}"
)
```

??? Example "Search child node type within parent"
```python
all_materials_in_project_paginator = cript_api.search(
node_type=cript.Material, # the node type you want back
search_mode=cript.SearchModes.CHILD_NODE_TYPE_WITHIN_PARENT, # type of search
parent_node=my_project_node # parent node to search through
)
```

??? Example "Search child node type within parent"
```python
materials_exact_name_in_project_paginator = cript_api.search(
node_type=cript.Material,
search_mode=cript.SearchModes.CHILD_WITH_EXACT_NAME_WITHIN_PARENT,
value_to_search="N-Butyl-2-chlorobenzamide",
parent_node=my_project_node
)
```

Parameters
----------
Expand All @@ -922,7 +936,28 @@ def search(
Refer to [valid search modes](../search_modes)
value_to_search : Optional[str]
What you are searching for can be either a value, and if you are only searching for
a `NODE_TYPE`, then this value can be empty or `None`
a `NODE_TYPE` or a search mode that does not take a value
then this value can be empty or `None` because it will not be used
> Not applicable for all search modes
parent_node: UUIDBaseNode default None
The parent that you are searching through.
> Not applicable for all search modes

Raises
------
ValueError
Raised when required arguments are missing for a specific search mode, preventing a silent failure.

Certain arguments are specific to different search modes. To ensure smooth API calls and avoid
confusion, this function performs a *pre-call* check for the necessary arguments based on the chosen
search mode. If a required argument is missing, a `ValueError` is raised.

Example:
```python
cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.EXACT_NAME)
```
> This example will raise a `ValueError` since the `value_to_search` argument is missing, which is
required for the `EXACT_NAME` SearchMode.

Returns
-------
Expand All @@ -940,22 +975,50 @@ def search(

# requesting a page of some primary node
if search_mode == SearchModes.NODE_TYPE:
api_endpoint = f"{self._host}/{node_type}"
api_endpoint = f"{self._host}/{node_type}/"
# not using `value_to_search`
value_to_search = None

elif search_mode == SearchModes.CONTAINS_NAME:
api_endpoint = f"{self._host}/search/{node_type}"
if value_to_search is None:
raise ValueError("`value_to_search` is needed for `SearchModes.CONTAINS_NAME`")

api_endpoint = f"{self._host}/search/{node_type}/"

elif search_mode == SearchModes.EXACT_NAME:
api_endpoint = f"{self._host}/search/exact/{node_type}"
if value_to_search is None:
raise ValueError("`value_to_search` is needed for `SearchModes.EXACT_NAME`")

api_endpoint = f"{self._host}/search/exact/{node_type}/"

elif search_mode == SearchModes.UUID:
api_endpoint = f"{self._host}/{node_type}/{value_to_search}"
api_endpoint = f"{self._host}/{node_type}/{value_to_search}/"
# putting the value_to_search in the URL instead of a query
value_to_search = None

elif search_mode == SearchModes.BIGSMILES:
if value_to_search is None:
raise ValueError("`value_to_search` is needed for `SearchModes.BIGSMILES`")

api_endpoint = f"{self._host}/search/bigsmiles/"

elif search_mode == SearchModes.CHILD_NODE_TYPE_WITHIN_PARENT:
if parent_node is None:
raise ValueError("`parent_node` is needed for `SearchModes.CHILD_NODE_TYPE_WITHIN_PARENT`")

api_endpoint = f"{self._host}/{parent_node.node_type}/{parent_node.uuid}/{node_type}/"
# not using `value_to_search`
value_to_search = None

elif search_mode == SearchModes.CHILD_WITH_EXACT_NAME_WITHIN_PARENT:
if value_to_search is None:
raise ValueError("`value_to_search` is needed for `SearchModes.CHILD_WITH_EXACT_NAME_WITHIN_PARENT`")

if parent_node is None:
raise ValueError("`parent_node` is needed for `SearchModes.CHILD_WITH_EXACT_NAME_WITHIN_PARENT`")

api_endpoint = f"{self._host}/search/exact/{parent_node.node_type}/{parent_node.uuid}/{node_type}/"

assert api_endpoint != ""

# TODO error handling if none of the API endpoints got hit
Expand Down
11 changes: 9 additions & 2 deletions src/cript/api/valid_search_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@ class SearchModes(Enum):
UUID : str
Search by node UUID.
BIGSMILES: str
search materials by bigsmiles identifier.
Search materials by bigsmiles identifier.
CHILD_NODE_TYPE_WITHIN_PARENT: str
Search for a node type within a parent.
> Example: Find all the materials within this specific project.
CHILD_WITH_EXACT_NAME_WITHIN_PARENT: str
Search for an exact node name within a parent
> Example: Search for the materials with that exact name in the project or returns nothing.

Examples
-------
Expand All @@ -34,5 +40,6 @@ class SearchModes(Enum):
EXACT_NAME: str = "exact_name"
CONTAINS_NAME: str = "contains_name"
UUID: str = "uuid"
# UUID_CHILDREN = "uuid_children"
BIGSMILES: str = "bigsmiles"
CHILD_NODE_TYPE_WITHIN_PARENT: str = "child_node_type_within_parent"
CHILD_WITH_EXACT_NAME_WITHIN_PARENT: str = "child_with_exact_name_within_parent"
30 changes: 19 additions & 11 deletions tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,22 +388,30 @@ def test_api_search_uuid(cript_api: cript.API) -> None:


@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token")
def test_api_search_bigsmiles(cript_api: cript.API) -> None:
def test_api_search_child_node_type_within_parent(cript_api: cript.API, cript_project_node) -> None:
"""
tests search method with bigsmiles SearchMode to see if we just get at least one match
searches for material
"{[][<]C(C)C(=O)O[>][<]}{[$][$]CCC(C)C[$],[$]CC(C(C)C)[$],[$]CC(C)(CC)[$][]}"
tests search NODE_TYPE_WITHIN_PARENT
searches for all materials within a project node
"""
materials_in_project_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.CHILD_NODE_TYPE_WITHIN_PARENT, parent_node=cript_project_node)

assert isinstance(materials_in_project_paginator, Paginator)
assert len(materials_in_project_paginator.current_page_results) >= 1


another good example can be "{[][$]CC(C)(C(=O)OCCCC)[$][]}"
@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token")
def test_api_search_child_with_exact_name_within_parent(cript_api: cript.API, cript_project_node) -> None:
"""
tests search NODE_TYPE_WITHIN_PARENT
searches for all materials within a project node
"""
bigsmiles_search_value = "{[][<]C(C)C(=O)O[>][<]}{[$][$]CCC(C)C[$],[$]CC(C(C)C)[$],[$]CC(C)(CC)[$][]}"
exact_name_to_search = "N-Butyl-2-chlorobenzamide"

bigsmiles_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.BIG_SMILES, value_to_search=bigsmiles_search_value)
materials_exact_name_in_project_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.CHILD_WITH_EXACT_NAME_WITHIN_PARENT, value_to_search=exact_name_to_search, parent_node=cript_project_node)

assert isinstance(bigsmiles_paginator, Paginator)
assert len(bigsmiles_paginator.current_page_results) >= 1
# not sure if this will always be in this position in every server environment, so commenting it out for now
# assert bigsmiles_paginator.current_page_results[1]["name"] == "BCDB_Material_285"
assert isinstance(materials_exact_name_in_project_paginator, Paginator)
assert len(materials_exact_name_in_project_paginator.current_page_results) >= 1
assert materials_exact_name_in_project_paginator.current_page_results[0]["name"] == exact_name_to_search


def test_get_my_user_node_from_api(cript_api: cript.API) -> None:
Expand Down
20 changes: 20 additions & 0 deletions tests/fixtures/primary_nodes.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
import copy
import json
import uuid
from typing import Dict

import pytest
from util import strip_uid_from_dict

import cript
from cript import load_nodes_from_json


@pytest.fixture(scope="function")
def cript_project_node(cript_api: cript.API) -> cript.Project:
"""
The CRIPT project node that exists on all server environments

Notes
-----
Good to use for when you need to test against a project but can't use any single project
because it might not be available on all CRIPT server environments, or any permission issues, that
the project is visible to you but not others running your tests so they get false errors.
"""
# get CRIPT project from API and convert to node
exact_name_paginator = cript_api.search(node_type=cript.Project, search_mode=cript.SearchModes.EXACT_NAME, value_to_search="cript")
cript_project_dict: Dict = exact_name_paginator.current_page_results[0]
cript_project_node: cript.Project = load_nodes_from_json(json.dumps(cript_project_dict))
return cript_project_node


@pytest.fixture(scope="function")
Expand Down