From 77e77775ad2483248938f42772b2405be2ed21dd Mon Sep 17 00:00:00 2001 From: Alonso Guevara Date: Tue, 22 Oct 2024 16:24:41 -0600 Subject: [PATCH] Fix drift search edge cases over small input sets (#1310) * Fix edge cases over small input sets * Ruff --- .semversioner/next-release/patch-20241022210153426558.json | 4 ++++ graphrag/query/structured_search/drift_search/action.py | 6 ++++-- graphrag/query/structured_search/drift_search/search.py | 5 +++-- .../query/structured_search/drift_search/system_prompt.py | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 .semversioner/next-release/patch-20241022210153426558.json diff --git a/.semversioner/next-release/patch-20241022210153426558.json b/.semversioner/next-release/patch-20241022210153426558.json new file mode 100644 index 0000000000..dc27bc2f77 --- /dev/null +++ b/.semversioner/next-release/patch-20241022210153426558.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Fix some edge cases on Drift Search over small input sets" +} diff --git a/graphrag/query/structured_search/drift_search/action.py b/graphrag/query/structured_search/drift_search/action.py index 1e90a9a18d..6c6405174c 100644 --- a/graphrag/query/structured_search/drift_search/action.py +++ b/graphrag/query/structured_search/drift_search/action.py @@ -72,10 +72,12 @@ async def asearch(self, search_engine: Any, global_query: str, scorer: Any = Non try: response = json.loads(search_result.response) - except json.JSONDecodeError as e: + except json.JSONDecodeError: error_message = "Failed to parse search response" log.exception("%s: %s", error_message, search_result.response) - raise ValueError(error_message) from e + # Do not launch exception as it will roll up with other steps + # Instead return an empty response and let score -inf handle it + response = {} self.answer = response.pop("response", None) self.score = response.pop("score", float("-inf")) diff --git a/graphrag/query/structured_search/drift_search/search.py b/graphrag/query/structured_search/drift_search/search.py index 947cb726e6..8d8942b526 100644 --- a/graphrag/query/structured_search/drift_search/search.py +++ b/graphrag/query/structured_search/drift_search/search.py @@ -129,11 +129,12 @@ def _process_primer_results( ]) follow_ups = [fu for i in response for fu in i.get("follow_up_queries", [])] - if len(follow_ups) == 0: + + if not follow_ups: error_msg = "No follow-up queries found in primer response. Ensure that the primer response includes follow-up queries." raise RuntimeError(error_msg) - score = sum(i["score"] for i in response) / len(response) + score = sum(i.get("score", float("-inf")) for i in response) / len(response) response_data = { "intermediate_answer": intermediate_answer, "follow_up_queries": follow_ups, diff --git a/graphrag/query/structured_search/drift_search/system_prompt.py b/graphrag/query/structured_search/drift_search/system_prompt.py index eb0e07c262..9d0097ec78 100644 --- a/graphrag/query/structured_search/drift_search/system_prompt.py +++ b/graphrag/query/structured_search/drift_search/system_prompt.py @@ -65,7 +65,7 @@ Add sections and commentary to the response as appropriate for the length and format. -Additionally provide a score for how well the response addresses the overall research question: {global_query}. Based on your response, suggest a few follow-up questions that could be asked to further explore the topic. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Generate at least five good follow-up queries. Format your response in JSON with the following keys and values: +Additionally provide a score between 0 and 100 representing how well the response addresses the overall research question: {global_query}. Based on your response, suggest up to five follow-up questions that could be asked to further explore the topic as it relates to the overall research question. Do not include scores or follow up questions in the 'response' field of the JSON, add them to the respective 'score' and 'follow_up_queries' keys of the JSON output. Format your response in JSON with the following keys and values: {{'response': str, Put your answer, formatted in markdown, here. Do not answer the global query in this section. 'score': int,