Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Advanced pagination #433

Merged
merged 11 commits into from
Mar 11, 2024
26 changes: 17 additions & 9 deletions src/cript/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ class API:
_host: str = ""
_api_token: str = ""
_storage_token: str = ""
_http_headers: dict = {}
_db_schema: Optional[DataSchema] = None
_api_prefix: str = "api"
_api_version: str = "v1"
_api_request_session: Union[None, requests.Session] = None

# trunk-ignore-begin(cspell)
# AWS S3 constants
Expand Down Expand Up @@ -213,9 +213,6 @@ def __init__(self, host: Union[str, None] = None, api_token: Union[str, None] =
self._api_token = api_token # type: ignore
self._storage_token = storage_token # type: ignore

# add Bearer to token for HTTP requests
self._http_headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"}

# set a logger instance to use for the class logs
self._init_logger(default_log_level)

Expand Down Expand Up @@ -322,6 +319,14 @@ def connect(self):
CRIPTConnectionError
raised when the host does not give the expected response
"""

# Establish a requests session object
if self._api_request_session:
self.disconnect()
self._api_request_session = requests.Session()
# add Bearer to token for HTTP requests
self._api_request_session.headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"}

# As a form to check our connection, we pull and establish the data schema
try:
self._db_schema = DataSchema(self)
Expand All @@ -344,6 +349,10 @@ def disconnect(self):

For manual connection: nested API object are discouraged.
"""
# Disconnect request session
if self._api_request_session:
self._api_request_session.close()

# Restore the previously active global API (might be None)
global _global_cached_api
_global_cached_api = self._previous_global_cached_api
Expand Down Expand Up @@ -946,7 +955,7 @@ def delete_node_by_uuid(self, node_type: str, node_uuid: str) -> None:

self.logger.info(f"Deleted '{node_type.title()}' with UUID of '{node_uuid}' from CRIPT API.")

def _capsule_request(self, url_path: str, method: str, api_request: bool = True, headers: Optional[Dict] = None, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response:
def _capsule_request(self, url_path: str, method: str, api_request: bool = True, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response:
"""Helper function that capsules every request call we make against the backend.

Please *always* use this methods instead of `requests` directly.
Expand All @@ -971,9 +980,6 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True,
additional keyword arguments that are passed to `request.request`
"""

if headers is None:
headers = self._http_headers

url: str = self.host
if api_request:
url += f"/{self.api_prefix}/{self.api_version}"
Expand All @@ -985,7 +991,9 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True,
pre_log_message += "..."
self.logger.debug(pre_log_message)

response: requests.Response = requests.request(url=url, method=method, headers=headers, timeout=timeout, **kwargs)
if self._api_request_session is None:
raise CRIPTAPIRequiredError
response: requests.Response = self._api_request_session.request(url=url, method=method, timeout=timeout, **kwargs)
post_log_message: str = f"Request return with {response.status_code}"
if self.extra_api_log_debug_info:
post_log_message += f" {response.text}"
Expand Down
11 changes: 10 additions & 1 deletion src/cript/api/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class CRIPTAPIRequiredError(CRIPTException):
"""
## Definition
Exception to be raised when the API object is requested, but no cript.API object exists yet.
Also make sure to use it in a context manager `with cript.API as api:` or manually call
`connect` and `disconnect`.

The CRIPT Python SDK relies on a cript.API object for creation, validation, and modification of nodes.
The cript.API object may be explicitly called by the user to perform operations to the API, or
Expand All @@ -83,14 +85,21 @@ class CRIPTAPIRequiredError(CRIPTException):
my_token = "123456" # To use your token securely, please consider using environment variables

my_api = cript.API(host=my_host, token=my_token)
my_api.connect()
# Your code
my_api.disconnect()
```
"""

def __init__(self):
pass

def __str__(self) -> str:
error_message = "cript.API object is required for an operation, but it does not exist." "Please instantiate a cript.API object to continue." "See the documentation for more details."
error_message = (
"cript.API object is required for an operation, but it does not exist."
"Please instantiate a cript.API object and connect it to API for example with a context manager `with cript.API() as api:` to continue."
"See the documentation for more details."
)

return error_message

Expand Down
75 changes: 72 additions & 3 deletions src/cript/api/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class Paginator:
_current_position: int
_fetched_nodes: list
_number_fetched_pages: int = 0
_limit_page_fetches: Union[int, None] = None
_num_skip_pages: int = 0
auto_load_nodes: bool = True

@beartype
Expand Down Expand Up @@ -103,11 +105,15 @@ def _fetch_next_page(self) -> None:
None
"""

# Check if we are supposed to fetch more pages
if self._limit_page_fetches and self._number_fetched_pages >= self._limit_page_fetches:
raise StopIteration

# Composition of the query URL
temp_url_path: str = self._url_path
temp_url_path += f"/?q={self._query}"
if self._initial_page_number is not None:
temp_url_path += f"&page={self._initial_page_number + self._number_fetched_pages}"
temp_url_path += f"&page={self.page_number}"
self._number_fetched_pages += 1

response: requests.Response = self._api._capsule_request(url_path=temp_url_path, method="GET")
Expand Down Expand Up @@ -136,9 +142,9 @@ def _fetch_next_page(self) -> None:

if api_response["code"] == 404 and api_response["error"] == "The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.":
current_page_results = []

self._api.logger.debug(f"The paginator hit a 404 HTTP for requesting this {temp_url_path} with GET. We interpret it as no nodes present, but this is brittle at the moment.")
# if API response is not 200 raise error for the user to debug
if api_response["code"] != 200:
elif api_response["code"] != 200:
raise APIError(api_error=str(response.json()), http_method="GET", api_url=temp_url_path)

# Here we only load the JSON into the temporary results.
Expand Down Expand Up @@ -174,3 +180,66 @@ def __next__(self):
def __iter__(self):
self._current_position = 0
return self

@property
def page_number(self) -> Union[int, None]:
"""Obtain the current page number the paginator is fetching next.

Returns
-------
int
positive number of the next page this paginator is fetching.
None
if no page number is associated with the pagination
"""
page_number = self._num_skip_pages + self._number_fetched_pages
if self._initial_page_number is not None:
page_number += self._initial_page_number
return page_number

@beartype
def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None:
"""Limit pagination to a maximum number of pages.

This can be used for very large searches with the paginator, so the search can be split into
smaller portions.

Parameters
----------
max_num_pages: Union[int, None],
positive integer with maximum number of page fetches.
or None, indicating unlimited number of page fetches are permitted.
"""
self._limit_page_fetches = max_num_pages

def skip_pages(self, skip_pages: int) -> int:
"""Skip pages in the pagination.

Warning this function is advanced usage and may not produce the results you expect.
In particular, every search is different, even if we search for the same values there is
no guarantee that the results are in the same order. (And results can change if data is
added or removed from CRIPT.) So if you break up your search with `limit_page_fetches` and
`skip_pages` there is no guarantee that it is the same as one continuous search.
If the paginator associated search does not accept pages, there is no effect.

Parameters
----------
skip_pages:int
Number of pages that the paginator skips now before fetching the next page.
The parameter is added to the internal state, so repeated calls skip more pages.

Returns
-------
int
The number this paginator is skipping. Internal skip count.

Raises
------
RuntimeError
If the total number of skipped pages is negative.
"""
num_skip_pages = self._num_skip_pages + skip_pages
if self._num_skip_pages < 0:
RuntimeError(f"Invalid number of skipped pages. The total number of pages skipped is negative {num_skip_pages}, requested to skip {skip_pages}.")
self._num_skip_pages = num_skip_pages
return self._num_skip_pages
26 changes: 22 additions & 4 deletions tests/api/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def test_api_search_node_type(cript_api: cript.API) -> None:

# test search results
assert isinstance(materials_paginator, Paginator)
materials_paginator.skip_pages(3)
materials_paginator.limit_page_fetches(3)
materials_list = []
while True:
try:
Expand All @@ -39,12 +41,9 @@ def test_api_search_node_type(cript_api: cript.API) -> None:
materials_paginator.auto_load_nodes = True
except StopIteration:
break
# We don't need to search for a million pages here.
if materials_paginator._number_fetched_pages > 6:
break

# Assure that we paginated more then one page
assert materials_paginator._number_fetched_pages > 0
assert materials_paginator.page_number == 6
assert len(materials_list) > 5
first_page_first_result = materials_list[0].name
# just checking that the word has a few characters in it
Expand Down Expand Up @@ -104,6 +103,25 @@ def test_api_search_uuid(cript_api: cript.API, dynamic_material_data) -> None:
assert str(uuid_list[0].uuid) == dynamic_material_data["uuid"]


@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token")
def test_empty_paginator(cript_api: cript.API) -> None:
non_existent_name = "This is an nonsensical name for a material and should never exist. %^&*()_"
exact_name_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.EXACT_NAME, value_to_search=non_existent_name)
with pytest.raises(StopIteration):
next(exact_name_paginator)
exact_name_paginator.auto_load_nodes = False
with pytest.raises(StopIteration):
next(exact_name_paginator)

# Special 0 UUID should not exist
uuid_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.UUID, value_to_search="00000000-0000-0000-0000-000000000000")
with pytest.raises(StopIteration):
next(uuid_paginator)
exact_name_paginator.auto_load_nodes = False
with pytest.raises(StopIteration):
next(uuid_paginator)


@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token")
def test_api_search_bigsmiles(cript_api: cript.API) -> None:
"""
Expand Down
Loading