Fix drift search edge cases over small input sets (#1310)

* Fix edge cases over small input sets * Ruff
microsoft · Oct 22, 2024 · 77e7777 · 77e7777
1 parent 8d8c67d
commit 77e7777
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 5 deletions.
diff --git a/.semversioner/next-release/patch-20241022210153426558.json b/.semversioner/next-release/patch-20241022210153426558.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Fix some edge cases on Drift Search over small input sets"
+}
diff --git a/graphrag/query/structured_search/drift_search/action.py b/graphrag/query/structured_search/drift_search/action.py
@@ -72,10 +72,12 @@ async def asearch(self, search_engine: Any, global_query: str, scorer: Any = Non
 
         try:
             response = json.loads(search_result.response)
-        except json.JSONDecodeError as e:
+        except json.JSONDecodeError:
             error_message = "Failed to parse search response"
             log.exception("%s: %s", error_message, search_result.response)
-            raise ValueError(error_message) from e
+            # Do not launch exception as it will roll up with other steps
+            # Instead return an empty response and let score -inf handle it
+            response = {}
 
         self.answer = response.pop("response", None)
         self.score = response.pop("score", float("-inf"))

diff --git a/graphrag/query/structured_search/drift_search/search.py b/graphrag/query/structured_search/drift_search/search.py
@@ -129,11 +129,12 @@ def _process_primer_results(
             ])
 
             follow_ups = [fu for i in response for fu in i.get("follow_up_queries", [])]
-            if len(follow_ups) == 0:
+
+            if not follow_ups:
                 error_msg = "No follow-up queries found in primer response. Ensure that the primer response includes follow-up queries."
                 raise RuntimeError(error_msg)
 
-            score = sum(i["score"] for i in response) / len(response)
+            score = sum(i.get("score", float("-inf")) for i in response) / len(response)
             response_data = {
                 "intermediate_answer": intermediate_answer,
                 "follow_up_queries": follow_ups,

diff --git a/graphrag/query/structured_search/drift_search/system_prompt.py b/graphrag/query/structured_search/drift_search/system_prompt.py
@@ -65,7 +65,7 @@
 
 Add sections and commentary to the response as appropriate for the length and format.
 
-Additionally provide a score for how well the response addresses the overall research question: {global_query}. Based on your response, suggest a few follow-up questions that could be asked to further explore the topic. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Generate at least five good follow-up queries. Format your response in JSON with the following keys and values:
+Additionally provide a score between 0 and 100 representing how well the response addresses the overall research question: {global_query}. Based on your response, suggest up to five follow-up questions that could be asked to further explore the topic as it relates to the overall research question. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Format your response in JSON with the following keys and values:
 
 {{'response': str, Put your answer, formatted in markdown, here. Do not answer the global query in this section.
 'score': int,