diff --git a/src/cript/api/api.py b/src/cript/api/api.py index 3081399f8..14f864bb9 100644 --- a/src/cript/api/api.py +++ b/src/cript/api/api.py @@ -59,10 +59,10 @@ class API: _host: str = "" _api_token: str = "" _storage_token: str = "" - _http_headers: dict = {} _db_schema: Optional[DataSchema] = None _api_prefix: str = "api" _api_version: str = "v1" + _api_request_session: Union[None, requests.Session] = None # trunk-ignore-begin(cspell) # AWS S3 constants @@ -213,9 +213,6 @@ def __init__(self, host: Union[str, None] = None, api_token: Union[str, None] = self._api_token = api_token # type: ignore self._storage_token = storage_token # type: ignore - # add Bearer to token for HTTP requests - self._http_headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"} - # set a logger instance to use for the class logs self._init_logger(default_log_level) @@ -322,6 +319,14 @@ def connect(self): CRIPTConnectionError raised when the host does not give the expected response """ + + # Establish a requests session object + if self._api_request_session: + self.disconnect() + self._api_request_session = requests.Session() + # add Bearer to token for HTTP requests + self._api_request_session.headers = {"Authorization": f"Bearer {self._api_token}", "Content-Type": "application/json"} + # As a form to check our connection, we pull and establish the data schema try: self._db_schema = DataSchema(self) @@ -344,6 +349,10 @@ def disconnect(self): For manual connection: nested API object are discouraged. """ + # Disconnect request session + if self._api_request_session: + self._api_request_session.close() + # Restore the previously active global API (might be None) global _global_cached_api _global_cached_api = self._previous_global_cached_api @@ -946,7 +955,7 @@ def delete_node_by_uuid(self, node_type: str, node_uuid: str) -> None: self.logger.info(f"Deleted '{node_type.title()}' with UUID of '{node_uuid}' from CRIPT API.") - def _capsule_request(self, url_path: str, method: str, api_request: bool = True, headers: Optional[Dict] = None, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response: + def _capsule_request(self, url_path: str, method: str, api_request: bool = True, timeout: int = _API_TIMEOUT, **kwargs) -> requests.Response: """Helper function that capsules every request call we make against the backend. Please *always* use this methods instead of `requests` directly. @@ -971,9 +980,6 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, additional keyword arguments that are passed to `request.request` """ - if headers is None: - headers = self._http_headers - url: str = self.host if api_request: url += f"/{self.api_prefix}/{self.api_version}" @@ -985,7 +991,9 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True, pre_log_message += "..." self.logger.debug(pre_log_message) - response: requests.Response = requests.request(url=url, method=method, headers=headers, timeout=timeout, **kwargs) + if self._api_request_session is None: + raise CRIPTAPIRequiredError + response: requests.Response = self._api_request_session.request(url=url, method=method, timeout=timeout, **kwargs) post_log_message: str = f"Request return with {response.status_code}" if self.extra_api_log_debug_info: post_log_message += f" {response.text}" diff --git a/src/cript/api/exceptions.py b/src/cript/api/exceptions.py index b303903a9..2d1f326a0 100644 --- a/src/cript/api/exceptions.py +++ b/src/cript/api/exceptions.py @@ -68,6 +68,8 @@ class CRIPTAPIRequiredError(CRIPTException): """ ## Definition Exception to be raised when the API object is requested, but no cript.API object exists yet. + Also make sure to use it in a context manager `with cript.API as api:` or manually call + `connect` and `disconnect`. The CRIPT Python SDK relies on a cript.API object for creation, validation, and modification of nodes. The cript.API object may be explicitly called by the user to perform operations to the API, or @@ -83,6 +85,9 @@ class CRIPTAPIRequiredError(CRIPTException): my_token = "123456" # To use your token securely, please consider using environment variables my_api = cript.API(host=my_host, token=my_token) + my_api.connect() + # Your code + my_api.disconnect() ``` """ @@ -90,7 +95,11 @@ def __init__(self): pass def __str__(self) -> str: - error_message = "cript.API object is required for an operation, but it does not exist." "Please instantiate a cript.API object to continue." "See the documentation for more details." + error_message = ( + "cript.API object is required for an operation, but it does not exist." + "Please instantiate a cript.API object and connect it to API for example with a context manager `with cript.API() as api:` to continue." + "See the documentation for more details." + ) return error_message diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py index 0625138a1..23e419ed0 100644 --- a/src/cript/api/paginator.py +++ b/src/cript/api/paginator.py @@ -33,6 +33,8 @@ class Paginator: _current_position: int _fetched_nodes: list _number_fetched_pages: int = 0 + _limit_page_fetches: Union[int, None] = None + _num_skip_pages: int = 0 auto_load_nodes: bool = True @beartype @@ -103,11 +105,15 @@ def _fetch_next_page(self) -> None: None """ + # Check if we are supposed to fetch more pages + if self._limit_page_fetches and self._number_fetched_pages >= self._limit_page_fetches: + raise StopIteration + # Composition of the query URL temp_url_path: str = self._url_path temp_url_path += f"/?q={self._query}" if self._initial_page_number is not None: - temp_url_path += f"&page={self._initial_page_number + self._number_fetched_pages}" + temp_url_path += f"&page={self.page_number}" self._number_fetched_pages += 1 response: requests.Response = self._api._capsule_request(url_path=temp_url_path, method="GET") @@ -136,9 +142,9 @@ def _fetch_next_page(self) -> None: if api_response["code"] == 404 and api_response["error"] == "The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.": current_page_results = [] - + self._api.logger.debug(f"The paginator hit a 404 HTTP for requesting this {temp_url_path} with GET. We interpret it as no nodes present, but this is brittle at the moment.") # if API response is not 200 raise error for the user to debug - if api_response["code"] != 200: + elif api_response["code"] != 200: raise APIError(api_error=str(response.json()), http_method="GET", api_url=temp_url_path) # Here we only load the JSON into the temporary results. @@ -174,3 +180,66 @@ def __next__(self): def __iter__(self): self._current_position = 0 return self + + @property + def page_number(self) -> Union[int, None]: + """Obtain the current page number the paginator is fetching next. + + Returns + ------- + int + positive number of the next page this paginator is fetching. + None + if no page number is associated with the pagination + """ + page_number = self._num_skip_pages + self._number_fetched_pages + if self._initial_page_number is not None: + page_number += self._initial_page_number + return page_number + + @beartype + def limit_page_fetches(self, max_num_pages: Union[int, None]) -> None: + """Limit pagination to a maximum number of pages. + + This can be used for very large searches with the paginator, so the search can be split into + smaller portions. + + Parameters + ---------- + max_num_pages: Union[int, None], + positive integer with maximum number of page fetches. + or None, indicating unlimited number of page fetches are permitted. + """ + self._limit_page_fetches = max_num_pages + + def skip_pages(self, skip_pages: int) -> int: + """Skip pages in the pagination. + + Warning this function is advanced usage and may not produce the results you expect. + In particular, every search is different, even if we search for the same values there is + no guarantee that the results are in the same order. (And results can change if data is + added or removed from CRIPT.) So if you break up your search with `limit_page_fetches` and + `skip_pages` there is no guarantee that it is the same as one continuous search. + If the paginator associated search does not accept pages, there is no effect. + + Parameters + ---------- + skip_pages:int + Number of pages that the paginator skips now before fetching the next page. + The parameter is added to the internal state, so repeated calls skip more pages. + + Returns + ------- + int + The number this paginator is skipping. Internal skip count. + + Raises + ------ + RuntimeError + If the total number of skipped pages is negative. + """ + num_skip_pages = self._num_skip_pages + skip_pages + if self._num_skip_pages < 0: + RuntimeError(f"Invalid number of skipped pages. The total number of pages skipped is negative {num_skip_pages}, requested to skip {skip_pages}.") + self._num_skip_pages = num_skip_pages + return self._num_skip_pages diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 7030e434a..d09c08bfb 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -24,6 +24,8 @@ def test_api_search_node_type(cript_api: cript.API) -> None: # test search results assert isinstance(materials_paginator, Paginator) + materials_paginator.skip_pages(3) + materials_paginator.limit_page_fetches(3) materials_list = [] while True: try: @@ -39,12 +41,9 @@ def test_api_search_node_type(cript_api: cript.API) -> None: materials_paginator.auto_load_nodes = True except StopIteration: break - # We don't need to search for a million pages here. - if materials_paginator._number_fetched_pages > 6: - break # Assure that we paginated more then one page - assert materials_paginator._number_fetched_pages > 0 + assert materials_paginator.page_number == 6 assert len(materials_list) > 5 first_page_first_result = materials_list[0].name # just checking that the word has a few characters in it @@ -104,6 +103,25 @@ def test_api_search_uuid(cript_api: cript.API, dynamic_material_data) -> None: assert str(uuid_list[0].uuid) == dynamic_material_data["uuid"] +@pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token") +def test_empty_paginator(cript_api: cript.API) -> None: + non_existent_name = "This is an nonsensical name for a material and should never exist. %^&*()_" + exact_name_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.EXACT_NAME, value_to_search=non_existent_name) + with pytest.raises(StopIteration): + next(exact_name_paginator) + exact_name_paginator.auto_load_nodes = False + with pytest.raises(StopIteration): + next(exact_name_paginator) + + # Special 0 UUID should not exist + uuid_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.UUID, value_to_search="00000000-0000-0000-0000-000000000000") + with pytest.raises(StopIteration): + next(uuid_paginator) + exact_name_paginator.auto_load_nodes = False + with pytest.raises(StopIteration): + next(uuid_paginator) + + @pytest.mark.skipif(not HAS_INTEGRATION_TESTS_ENABLED, reason="requires a real cript_api_token") def test_api_search_bigsmiles(cript_api: cript.API) -> None: """