From 6a0457ae5b1e25bf0fd0bf547da791e575eb4656 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <ludwigschneider@uchicago.edu>
Date: Tue, 27 Feb 2024 08:25:29 -0600
Subject: [PATCH 1/2] Allow users to handle invalid nodes coming from a search
 against the API

---
 docs/faq.md                | 53 ++++++++++++++++++++++++++++++++++++--
 src/cript/api/api.py       |  2 +-
 src/cript/api/paginator.py | 30 ++++++++++++++++-----
 tests/api/test_search.py   | 17 +++++++++++-
 4 files changed, 91 insertions(+), 11 deletions(-)

diff --git a/docs/faq.md b/docs/faq.md
index 8786dd986..867abc356 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -55,7 +55,7 @@ tab for developer documentation._
 
 **Q:** Is there documentation detailing the internal workings of the code?
 
-**A:** _Absolutely! For an in-depth look at the CRIPT Python SDK code, 
+**A:** _Absolutely! For an in-depth look at the CRIPT Python SDK code,
 consult the [GitHub repository wiki internal documentation](https://github.com/C-Accel-CRIPT/Python-SDK/wiki)._
 
 ---
@@ -84,7 +84,7 @@ A GitHub account is required._
 
 **Q:** Where can I find the release notes for each SDK version?
 
-**A:** _The release notes can be found on our 
+**A:** _The release notes can be found on our
 [CRIPT Python SDK repository releases section](https://github.com/C-Accel-CRIPT/Python-SDK/releases)_
 
 ---
@@ -97,6 +97,55 @@ the code is written to get a better grasp of it?
 There you will find documentation on everything from how our code is structure,
 how we aim to write our documentation, CI/CD, and more._
 
+---
+
+**Q:** What can I do, when my `api.search(...)` fails with a `cript.nodes.exception.CRIPTJsonDeserializationError` or similar?
+
+**A:** _There is a solution for you. Sometimes CRIPT can contain nodes formatted in a way that the Python SDK does not understand. We can disable the automatic conversion from the API response into SDK nodes. Here is an example of how to achieve this:
+```python
+# Create API object in with statement, here it assumes host, token, and storage token are in your environment variables
+with cript.API() as api:
+    # Find the paginator object, which is a python iterator over the search results.
+    materials_paginator = cript_api.search(node_type=cript.Material, search_mode=cript.SearchModes.NODE_TYPE)
+    # Usually you would do
+    # `materials_list = list(materials_paginator)`
+    # or
+    # for node in materials_paginator:
+    #    #do node stuff
+    # But now we want more control over the iteration to ignore failing node decoding.
+    # And store the result in a list of valid nodes
+    materials_list = []
+    # We use a while True loop to iterate over the results
+    while True:
+        # This first try catches, when we reach the end of the search results.
+	# The `next()` function raises a StopIteration exception in that case
+        try:
+	    # First we try to convert the current response into a node directly
+            try:
+                material_node = next(materials_paginator)
+	    # But if that fails, we catch the exception from CRIPT
+            except cript.CRIPTException as exc:
+                # In case of failure, we disable the auto_load_function temporarily
+                materials_paginator.auto_load_nodes = False
+		# And only obtain the unloaded node JSON instead
+                material_json = next(materials_paginator)
+		# Here you can inspect and manually handle the problem.
+		# In the example, we just print it and ignore it otherwise
+		print(exc, material_json)
+            else:
+		# After a valid node is loaded (try block didn't fail)
+		# we store the valid node in the list
+                materials_list += [material_node]
+            finally:
+	        # No matter what happened, for the next iteration we want to try to obtain
+		# an auto loaded node again, so we reset the paginator state.
+                materials_paginator.auto_load_nodes = True
+        except StopIteration:
+	    # If next() of the paginator indicates an end of the search results, break the loop
+            break
+```
+
+
 _We try to also have type hinting, comments, and docstrings for all the code that we work on so it is clear and easy for anyone reading it to easily understand._
 
 _if all else fails, contact us on our [GitHub Repository](https://github.com/C-Accel-CRIPT/Python-SDK)._
diff --git a/src/cript/api/api.py b/src/cript/api/api.py
index 980c08876..3081399f8 100644
--- a/src/cript/api/api.py
+++ b/src/cript/api/api.py
@@ -988,7 +988,7 @@ def _capsule_request(self, url_path: str, method: str, api_request: bool = True,
         response: requests.Response = requests.request(url=url, method=method, headers=headers, timeout=timeout, **kwargs)
         post_log_message: str = f"Request return with {response.status_code}"
         if self.extra_api_log_debug_info:
-            post_log_message += f" {response.raw}"
+            post_log_message += f" {response.text}"
         self.logger.debug(post_log_message)
 
         return response
diff --git a/src/cript/api/paginator.py b/src/cript/api/paginator.py
index 929954727..0625138a1 100644
--- a/src/cript/api/paginator.py
+++ b/src/cript/api/paginator.py
@@ -1,4 +1,4 @@
-from json import JSONDecodeError
+import json
 from typing import Dict, Union
 from urllib.parse import quote
 
@@ -33,6 +33,7 @@ class Paginator:
     _current_position: int
     _fetched_nodes: list
     _number_fetched_pages: int = 0
+    auto_load_nodes: bool = True
 
     @beartype
     def __init__(
@@ -119,8 +120,11 @@ def _fetch_next_page(self) -> None:
         # if converting API response to JSON gives an error
         # then there must have been an API error, so raise the requests error
         # this is to avoid bad indirect errors and make the errors more direct for users
-        except JSONDecodeError:
-            response.raise_for_status()
+        except json.JSONDecodeError as json_exc:
+            try:
+                response.raise_for_status()
+            except Exception as exc:
+                raise exc from json_exc
 
         # handling both cases in case there is result inside of data or just data
         try:
@@ -137,8 +141,10 @@ def _fetch_next_page(self) -> None:
         if api_response["code"] != 200:
             raise APIError(api_error=str(response.json()), http_method="GET", api_url=temp_url_path)
 
-        node_list = load_nodes_from_json(current_page_results)
-        self._fetched_nodes += node_list
+        # Here we only load the JSON into the temporary results.
+        # This delays error checking, and allows users to disable auto node conversion
+        json_list = current_page_results
+        self._fetched_nodes += json_list
 
     def __next__(self):
         if self._current_position >= len(self._fetched_nodes):
@@ -147,14 +153,24 @@ def __next__(self):
                 raise StopIteration
             self._fetch_next_page()
 
-        self._current_position += 1
         try:
-            return self._fetched_nodes[self._current_position - 1]
+            next_node_json = self._fetched_nodes[self._current_position - 1]
         except IndexError:  # This is not a random access iteration.
             # So if fetching a next page wasn't enough to get the index inbound,
             # The iteration stops
             raise StopIteration
 
+        if self.auto_load_nodes:
+            return_data = load_nodes_from_json(next_node_json)
+        else:
+            return_data = next_node_json
+
+        # Advance position last, so if an exception occurs, for example when
+        # node decoding fails, we do not advance, and users can try again without decoding
+        self._current_position += 1
+
+        return return_data
+
     def __iter__(self):
         self._current_position = 0
         return self
diff --git a/tests/api/test_search.py b/tests/api/test_search.py
index 860ab43ea..47d3de802 100644
--- a/tests/api/test_search.py
+++ b/tests/api/test_search.py
@@ -24,7 +24,22 @@ def test_api_search_node_type(cript_api: cript.API) -> None:
 
     # test search results
     assert isinstance(materials_paginator, Paginator)
-    materials_list = list(materials_paginator)
+    materials_list = []
+    while True:
+        try:
+            try:
+                material_node = next(materials_paginator)
+            except cript.CRIPTException as exc:
+                materials_paginator.auto_load_nodes = False
+                material_json = next(materials_paginator)
+                print(exc, material_json)
+            else:
+                materials_list += [material_node]
+            finally:
+                materials_paginator.auto_load_nodes = True
+        except StopIteration:
+            break
+
     # Assure that we paginated more then one page
     assert materials_paginator._current_page_number > 0
     assert len(materials_list) > 5

From e7bfa35570de7b0555e96cd63cc526e08c0a2163 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <ludwigschneider@uchicago.edu>
Date: Tue, 27 Feb 2024 08:35:55 -0600
Subject: [PATCH 2/2] fix issues and shorten test

---
 tests/api/test_search.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/api/test_search.py b/tests/api/test_search.py
index 47d3de802..7030e434a 100644
--- a/tests/api/test_search.py
+++ b/tests/api/test_search.py
@@ -39,11 +39,14 @@ def test_api_search_node_type(cript_api: cript.API) -> None:
                 materials_paginator.auto_load_nodes = True
         except StopIteration:
             break
+        # We don't need to search for a million pages here.
+        if materials_paginator._number_fetched_pages > 6:
+            break
 
     # Assure that we paginated more then one page
-    assert materials_paginator._current_page_number > 0
+    assert materials_paginator._number_fetched_pages > 0
     assert len(materials_list) > 5
-    first_page_first_result = materials_list[0]["name"]
+    first_page_first_result = materials_list[0].name
     # just checking that the word has a few characters in it
     assert len(first_page_first_result) > 3