Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update search, so it works with the new after syntax. #456

Merged
merged 10 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions src/cript/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,35 +787,32 @@ def search(
node_type = node_type.node_type_snake_case

api_endpoint: str = ""
page_number: Union[int, None] = None

limit_node_fetches: Optional[int] = None
if search_mode == SearchModes.NODE_TYPE:
api_endpoint = f"/search/{node_type}"
page_number = 0
value_to_search = ""

elif search_mode == SearchModes.CONTAINS_NAME:
api_endpoint = f"/search/{node_type}"
page_number = 0

elif search_mode == SearchModes.EXACT_NAME:
api_endpoint = f"/search/exact/{node_type}"
page_number = None
limit_node_fetches = 1

elif search_mode == SearchModes.UUID:
api_endpoint = f"/{node_type}/{value_to_search}"
# putting the value_to_search in the URL instead of a query
value_to_search = ""
page_number = None
limit_node_fetches = 1

elif search_mode == SearchModes.BIGSMILES:
api_endpoint = "/search/bigsmiles/"
page_number = 0

# error handling if none of the API endpoints got hit
else:
raise RuntimeError("Internal Error: Failed to recognize any search modes. Please report this bug on https://github.com/C-Accel-CRIPT/Python-SDK/issues.")

return Paginator(api=self, url_path=api_endpoint, page_number=page_number, query=value_to_search)
return Paginator(api=self, url_path=api_endpoint, query=value_to_search, limit_node_fetches=limit_node_fetches)

def delete(self, node) -> None:
"""
Expand Down
168 changes: 90 additions & 78 deletions src/cript/api/paginator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Dict, Union
from typing import Dict, Optional, Tuple
from urllib.parse import quote

import requests
Expand All @@ -9,6 +9,16 @@
from cript.nodes.util import load_nodes_from_json


def _get_uuid_score_from_json(node_dict: Dict) -> Tuple[str, Optional[float]]:
"""
Get the UUID string and search score from a JSON node representation if available.
"""
node_uuid: str = node_dict["uuid"]
node_score: Optional[float] = node_dict.get("score", None)

return node_uuid, node_score


class Paginator:
"""
Paginator is used to flip through different pages of data that the API returns when searching.
Expand All @@ -29,22 +39,17 @@ class Paginator:

_url_path: str
_query: str
_initial_page_number: Union[int, None]
_current_position: int
_fetched_nodes: list
_uuid_search_score_map: Dict
_number_fetched_pages: int = 0
_limit_page_fetches: Union[int, None] = None
_num_skip_pages: int = 0
_limit_node_fetches: Optional[int] = None
_start_after_uuid: Optional[str] = None
_start_after_score: Optional[float] = None
auto_load_nodes: bool = True

@beartype
def __init__(
self,
api,
url_path: str,
page_number: Union[int, None],
query: str,
):
def __init__(self, api, url_path: str, query: str, limit_node_fetches: Optional[int] = None):
"""
create a paginator

Expand All @@ -53,34 +58,33 @@ def __init__(

Parameters
----------
http_headers: dict
get already created http headers from API and just use them in paginator
api_endpoint: str
api endpoint to send the search requests to
it already contains what node the user is looking for
current_page_number: int
page number to start from. Keep track of current page for user to flip back and forth between pages of data
api: cript.API
Object through which the API call is routed.
url_path: str
query URL used.
query: str
the value the user is searching for
limit_node_fetches: Optional[int] = None
limits the number of nodes fetches through this call.

Returns
-------
None
instantiate a paginator
"""
self._api = api
self._initial_page_number = page_number
self._number_fetched_pages = 0
self._fetched_nodes = []
self._current_position = 0
self._limit_node_fetches = limit_node_fetches
self._uuid_search_score_map = {}

# check if it is a string and not None to avoid AttributeError
try:
self._url_path = quote(url_path.rstrip("/").strip())
self._url_path = url_path.rstrip("/").strip()
except Exception as exc:
raise RuntimeError(f"Invalid type for api_endpoint {self._url_path} for a paginator.") from exc

self._query = quote(query)
self._query = query

@beartype
def _fetch_next_page(self) -> None:
Expand All @@ -105,16 +109,36 @@ def _fetch_next_page(self) -> None:
None
"""

# Check if we are supposed to fetch more pages
if self._limit_page_fetches and self._number_fetched_pages >= self._limit_page_fetches:
raise StopIteration

# Composition of the query URL
temp_url_path: str = self._url_path
temp_url_path += f"/?q={self._query}"
if self._initial_page_number is not None:
temp_url_path += f"&page={self.page_number}"
self._number_fetched_pages += 1
temp_url_path: str = self._url_path + "/"

query_list = []

if len(self._query) > 0:
query_list += [f"q={self._query}"]

if self._limit_node_fetches is None or self._limit_node_fetches > 1: # This limits these parameters
if self._start_after_uuid is not None:
query_list += [f"after={self._start_after_uuid}"]
if self._start_after_score is not None: # Always None for none BigSMILES searches
query_list += [f"score={self._start_after_score}"]

# Reset to allow normal search to continue
self._start_after_uuid = None
self._start_after_score = None

elif len(self._fetched_nodes) > 0: # Use known last element
node_uuid, node_score = _get_uuid_score_from_json(self._fetched_nodes[-1])
query_list += [f"after={node_uuid}"]
if node_score is not None:
query_list += [f"score={node_score}"]

for i, query in enumerate(query_list):
if i == 0:
temp_url_path += "?"
else:
temp_url_path += "&"
temp_url_path += quote(query, safe="/=&?")

response: requests.Response = self._api._capsule_request(url_path=temp_url_path, method="GET")

Expand Down Expand Up @@ -153,18 +177,18 @@ def _fetch_next_page(self) -> None:
self._fetched_nodes += json_list

def __next__(self):
if self._limit_node_fetches and self._current_position >= self._limit_node_fetches:
raise StopIteration

if self._current_position >= len(self._fetched_nodes):
# Without a page number argument, we can only fetch once.
if self._initial_page_number is None and self._number_fetched_pages > 0:
raise StopIteration
self._fetch_next_page()

try:
next_node_json = self._fetched_nodes[self._current_position - 1]
except IndexError: # This is not a random access iteration.
except IndexError as exc: # This is not a random access iteration.
# So if fetching a next page wasn't enough to get the index inbound,
# The iteration stops
raise StopIteration
raise StopIteration from exc

if self.auto_load_nodes:
return_data = load_nodes_from_json(next_node_json)
Expand All @@ -181,65 +205,53 @@ def __iter__(self):
self._current_position = 0
return self

@property
def page_number(self) -> Union[int, None]:
"""Obtain the current page number the paginator is fetching next.

Returns
-------
int
positive number of the next page this paginator is fetching.
None
if no page number is associated with the pagination
"""
page_number = self._num_skip_pages + self._number_fetched_pages
if self._initial_page_number is not None:
page_number += self._initial_page_number
return page_number

@beartype
def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None:
def limit_node_fetches(self, max_num_nodes: Optional[int]) -> None:
"""Limit pagination to a maximum number of pages.

This can be used for very large searches with the paginator, so the search can be split into
smaller portions.

Parameters
----------
max_num_pages: Union[int, None],
max_num_nodes: Optional[int],
positive integer with maximum number of page fetches.
or None, indicating unlimited number of page fetches are permitted.
"""
self._limit_page_fetches = max_num_pages
self._limit_node_fetches = max_num_nodes

def skip_pages(self, skip_pages: int) -> int:
"""Skip pages in the pagination.

Warning this function is advanced usage and may not produce the results you expect.
In particular, every search is different, even if we search for the same values there is
no guarantee that the results are in the same order. (And results can change if data is
added or removed from CRIPT.) So if you break up your search with `limit_page_fetches` and
`skip_pages` there is no guarantee that it is the same as one continuous search.
If the paginator associated search does not accept pages, there is no effect.
@beartype
def start_after_uuid(self, start_after_uuid: str, start_after_score: Optional[float] = None):
"""
This can be used to continue a search from a last known node.

Parameters
----------
skip_pages:int
Number of pages that the paginator skips now before fetching the next page.
The parameter is added to the internal state, so repeated calls skip more pages.
start_after_uuid: str
UUID string of the last node from a previous search
start_after_score: float
required for BigSMILES searches, the last score from a BigSMILES search.
Must be None if not a BigSMILES search.

Returns
-------
int
The number this paginator is skipping. Internal skip count.
None
"""
self._start_after_uuid = start_after_uuid
self._start_after_score = start_after_score

Raises
------
RuntimeError
If the total number of skipped pages is negative.
@beartype
def get_bigsmiles_search_score(self, uuid: str):
"""
num_skip_pages = self._num_skip_pages + skip_pages
if self._num_skip_pages < 0:
RuntimeError(f"Invalid number of skipped pages. The total number of pages skipped is negative {num_skip_pages}, requested to skip {skip_pages}.")
self._num_skip_pages = num_skip_pages
return self._num_skip_pages
Get the ranking score for nodes from the BigSMILES search.
Will return None if not a BigSMILES search or raise an Exception.
"""
if uuid not in self._uuid_search_score_map.keys():
start = len(self._uuid_search_score_map.keys())
for node_json in self._fetched_nodes[start:]:
node_uuid, node_score = _get_uuid_score_from_json(node_json)
self._uuid_search_score_map[node_uuid] = node_score
try:
return self._uuid_search_score_map[uuid]
except KeyError as exc:
raise RuntimeError(f"The requested UUID {uuid} is not know from the search. Search scores are limited only to current search.") from exc
4 changes: 4 additions & 0 deletions src/cript/nodes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ def _from_json(cls, json_dict: dict):
pass
else:
arguments[field] = json_dict[field]
try: # TODO remove this hack to work with compatible model versions
del arguments["model_version"]
except KeyError:
pass

# add omitted fields from default (necessary if they are required)
for field_name in [field.name for field in dataclasses.fields(default_dataclass)]:
Expand Down
Loading
Loading