From 4b9fdc0dfe885903cf66bd0655522004ed060ff7 Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Thu, 22 Aug 2024 12:07:50 -0400 Subject: [PATCH] Add context data to query responses (#1003) * add context data to query responses * add semversioner file * ignore typechecking ruff suggestion --- .../patch-20240821235154401001.json | 4 ++ graphrag/query/api.py | 37 ++++++++++++------- graphrag/query/cli.py | 12 +++++- 3 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 .semversioner/next-release/patch-20240821235154401001.json diff --git a/.semversioner/next-release/patch-20240821235154401001.json b/.semversioner/next-release/patch-20240821235154401001.json new file mode 100644 index 0000000000..b544e564cf --- /dev/null +++ b/.semversioner/next-release/patch-20240821235154401001.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Add context data to query API responses." +} diff --git a/graphrag/query/api.py b/graphrag/query/api.py index 050af3b729..788de8c0ca 100644 --- a/graphrag/query/api.py +++ b/graphrag/query/api.py @@ -9,9 +9,9 @@ Contains the following functions: - global_search: Perform a global search. - - global_search_streaming: Perform a global search and stream results via a generator. + - global_search_streaming: Perform a global search and stream results back. - local_search: Perform a local search. - - local_search_streaming: Perform a local search and stream results via a generator. + - local_search_streaming: Perform a local search and stream results back. WARNING: This API is under development and may undergo changes in future releases. Backwards compatibility is not guaranteed at this time. @@ -26,6 +26,7 @@ from graphrag.config.models.graph_rag_config import GraphRagConfig from graphrag.index.progress.types import PrintProgressReporter from graphrag.model.entity import Entity +from graphrag.query.structured_search.base import SearchResult # noqa: TCH001 from graphrag.vector_stores.lancedb import LanceDBVectorStore from graphrag.vector_stores.typing import VectorStoreFactory, VectorStoreType @@ -51,8 +52,11 @@ async def global_search( community_level: int, response_type: str, query: str, -) -> str | dict[str, Any] | list[dict[str, Any]]: - """Perform a global search. +) -> tuple[ + str | dict[str, Any] | list[dict[str, Any]], + str | list[pd.DataFrame] | dict[str, pd.DataFrame], +]: + """Perform a global search and return the context data and response. Parameters ---------- @@ -80,9 +84,10 @@ async def global_search( entities=_entities, response_type=response_type, ) - result = await search_engine.asearch(query=query) - reporter.success(f"Global Search Response: {result.response}") - return result.response + result: SearchResult = await search_engine.asearch(query=query) + response = result.response + context_data = _reformat_context_data(result.context_data) # type: ignore + return response, context_data @validate_call(config={"arbitrary_types_allowed": True}) @@ -95,7 +100,7 @@ async def global_search_streaming( response_type: str, query: str, ) -> AsyncGenerator: - """Perform a global search and return results as a generator. + """Perform a global search and return the context data and response via a generator. Context data is returned as a dictionary of lists, with one list entry for each record. @@ -152,8 +157,11 @@ async def local_search( community_level: int, response_type: str, query: str, -) -> str | dict[str, Any] | list[dict[str, Any]]: - """Perform a local search. +) -> tuple[ + str | dict[str, Any] | list[dict[str, Any]], + str | list[pd.DataFrame] | dict[str, pd.DataFrame], +]: + """Perform a local search and return the context data and response. Parameters ---------- @@ -203,9 +211,10 @@ async def local_search( response_type=response_type, ) - result = await search_engine.asearch(query=query) - reporter.success(f"Local Search Response: {result.response}") - return result.response + result: SearchResult = await search_engine.asearch(query=query) + response = result.response + context_data = _reformat_context_data(result.context_data) # type: ignore + return response, context_data @validate_call(config={"arbitrary_types_allowed": True}) @@ -221,7 +230,7 @@ async def local_search_streaming( response_type: str, query: str, ) -> AsyncGenerator: - """Perform a local search and return results as a generator. + """Perform a local search and return the context data and response via a generator. Parameters ---------- diff --git a/graphrag/query/cli.py b/graphrag/query/cli.py index 59eb4b454a..de625e912a 100644 --- a/graphrag/query/cli.py +++ b/graphrag/query/cli.py @@ -78,7 +78,7 @@ async def run_streaming_search(): return asyncio.run(run_streaming_search()) # not streaming - return asyncio.run( + response, context_data = asyncio.run( api.global_search( config=config, nodes=final_nodes, @@ -89,6 +89,10 @@ async def run_streaming_search(): query=query, ) ) + reporter.success(f"Global Search Response:\n{response}") + # NOTE: we return the response and context data here purely as a complete demonstration of the API. + # External users should use the API directly to get the response and context data. + return response, context_data def run_local_search( @@ -156,7 +160,7 @@ async def run_streaming_search(): return asyncio.run(run_streaming_search()) # not streaming - return asyncio.run( + response, context_data = asyncio.run( api.local_search( config=config, nodes=final_nodes, @@ -170,6 +174,10 @@ async def run_streaming_search(): query=query, ) ) + reporter.success(f"Local Search Response:\n{response}") + # NOTE: we return the response and context data here purely as a complete demonstration of the API. + # External users should use the API directly to get the response and context data. + return response, context_data def _configure_paths_and_settings(