Skip to content

Commit

Permalink
add some advanced features to the paginator
Browse files Browse the repository at this point in the history
  • Loading branch information
InnocentBug committed Feb 28, 2024
1 parent 7aa8b30 commit b520b7d
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 5 deletions.
69 changes: 68 additions & 1 deletion src/cript/api/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class Paginator:
_current_position: int
_fetched_nodes: list
_number_fetched_pages: int = 0
_limit_page_fetches: Union[int, None] = None
_num_skip_pages: int = 0
auto_load_nodes: bool = True

@beartype
Expand Down Expand Up @@ -103,11 +105,15 @@ def _fetch_next_page(self) -> None:
None
"""

# Check if we are supposed to fetch more pages
if self._limit_page_fetches and self._number_fetched_pages >= self._limit_page_fetches:
raise StopIteration

# Composition of the query URL
temp_url_path: str = self._url_path
temp_url_path += f"/?q={self._query}"
if self._initial_page_number is not None:
temp_url_path += f"&page={self._initial_page_number + self._number_fetched_pages}"
temp_url_path += f"&page={self.page_number}"
self._number_fetched_pages += 1

response: requests.Response = self._api._capsule_request(url_path=temp_url_path, method="GET")
Expand Down Expand Up @@ -174,3 +180,64 @@ def __next__(self):
def __iter__(self):
self._current_position = 0
return self

@property
def page_number(self) -> Union[int, None]:
"""Obtain the current page number the paginator is fetching next.
Returns
-------
int
positive number of the next page this paginator is fetching.
None
if no page number is associated with the pagination
"""
if self._initial_page_number is not None:
return self._num_skip_pages + self._initial_page_number + self._number_fetched_pages

@beartype
def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None:
"""Limit pagination to a maximum number of pages.
This can be used for very large searches with the paginator, so the search can be split into
smaller portions.
Parameters
----------
max_num_pages: Union[int, None],
positive integer with maximum number of page fetches.
or None, indicating unlimited number of page fetches are permitted.
"""
self._limit_page_fetches = max_num_pages

def skip_pages(self, skip_pages: int) -> int:
"""Skip pages in the pagination.
Warning this function is advanced usage and may not produce the results you expect.
In particular, every search is different, even if we search for the same values there is
no guarantee that the results are in the same order. (And results can change if data is
added or removed from CRIPT.) So if you break up your search with `limit_page_fetches` and
`skip_pages` there is no guarantee that it is the same as one continuous search.
If the paginator associated search does not accept pages, there is no effect.
Parameters
----------
skip_pages:int
Number of pages that the paginator skips now before fetching the next page.
The parameter is added to the internal state, so repeated calls skip more pages.
Returns
-------
int
The number this paginator is skipping. Internal skip count.
Raises
------
RuntimeError
If the total number of skipped pages is negative.
"""
num_skip_pages = self._num_skip_pages + skip_pages
if self._num_skip_pages < 0:
RuntimeError(f"Invalid number of skipped pages. The total number of pages skipped is negative {num_skip_pages}, requested to skip {skip_pages}.")
self._num_skip_pages = num_skip_pages
return self._num_skip_pages
7 changes: 3 additions & 4 deletions tests/api/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def test_api_search_node_type(cript_api: cript.API) -> None:

# test search results
assert isinstance(materials_paginator, Paginator)
materials_paginator.skip_pages(3)
materials_paginator.limit_page_fetches(3)
materials_list = []
while True:
try:
Expand All @@ -39,12 +41,9 @@ def test_api_search_node_type(cript_api: cript.API) -> None:
materials_paginator.auto_load_nodes = True
except StopIteration:
break
# We don't need to search for a million pages here.
if materials_paginator._number_fetched_pages > 6:
break

# Assure that we paginated more then one page
assert materials_paginator._number_fetched_pages > 0
assert materials_paginator.page_number == 6
assert len(materials_list) > 5
first_page_first_result = materials_list[0].name
# just checking that the word has a few characters in it
Expand Down

0 comments on commit b520b7d

Please sign in to comment.