From d033d886d5f897ac73a1d54ff364d016716ea3b3 Mon Sep 17 00:00:00 2001 From: KylinMountain Date: Wed, 10 Jul 2024 12:52:29 +0800 Subject: [PATCH 1/3] update map prompt of global query --- .../global_search/map_system_prompt.py | 71 +++++++------------ .../structured_search/global_search/search.py | 10 +-- 2 files changed, 32 insertions(+), 49 deletions(-) diff --git a/graphrag/query/structured_search/global_search/map_system_prompt.py b/graphrag/query/structured_search/global_search/map_system_prompt.py index db1a649df3..81e4afc6a8 100644 --- a/graphrag/query/structured_search/global_search/map_system_prompt.py +++ b/graphrag/query/structured_search/global_search/map_system_prompt.py @@ -4,15 +4,12 @@ """System prompts for global search.""" MAP_SYSTEM_PROMPT = """ ----Role--- - You are a helpful assistant responding to questions about data in the tables provided. +""" - ----Goal--- - +MAP_USER_PROMPT = """ +=============== Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables. - You should use the data provided in the data tables below as the primary context for generating the response. If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up. @@ -20,7 +17,7 @@ - Description: A comprehensive description of the point. - Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0. -The response should be JSON formatted as follows: +The response MUST be JSON formatted as follows: {{ "points": [ {{"description": "Description of point 1 [Data: Reports (report ids)]", "score": score_value}}, @@ -28,35 +25,6 @@ ] }} -The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will". - -Points supported by data should list the relevant reports as references as follows: -"This is an example sentence supported by data references [Data: Reports (report ids)]" - -**Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. - -For example: -"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]" - -where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables. - -Do not include information where the supporting evidence for it is not provided. - - ----Data tables--- - -{context_data} - ----Goal--- - -Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables. - -You should use the data provided in the data tables below as the primary context for generating the response. -If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up. - -Each key point in the response should have the following element: -- Description: A comprehensive description of the point. -- Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0. The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will". @@ -65,18 +33,31 @@ **Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. +=============== For example: -"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]" - -where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables. - -Do not include information where the supporting evidence for it is not provided. - -The response should be JSON formatted as follows: +user question: Is Person X currently under investigation for alleged illegal activities or unethical behavior? +---Data Tables--- +| id | title | occurrence weight | content | rank | +|----|--------------------------------------|-------------------|---------|------| +| 1 | Allegations against Person X | 1 | Allegations of financial misconduct | 4.0 | +| 2 | Allegations against Person X | 0.3 | Allegations of unethical business practices | 4.0 | +| 3 | Allegations against Person X | 0.8 | Allegations of workplace harassment | 3.0 | +| 4 | CEO of company X | 1 | Person X is CEO of Company X | 3.0 | +| 5 | owner of company Y | 1 | Person X is the owner of Company Y | 3.0 | + +output: {{ "points": [ - {{"description": "Description of point 1 [Data: Reports (report ids)]", "score": score_value}}, - {{"description": "Description of point 2 [Data: Reports (report ids)]", "score": score_value}} + {{"description": "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1, 2, 3, 4)].", "score": 85}}, + {{"description": "He is also CEO of company X [Data: Reports (6)]", "score": 75}} ] }} + +============== +user question: {user_question} + +---Data tables--- +{context_data} + +output: """ diff --git a/graphrag/query/structured_search/global_search/search.py b/graphrag/query/structured_search/global_search/search.py index 3b52ecbd8c..c65ee577a5 100644 --- a/graphrag/query/structured_search/global_search/search.py +++ b/graphrag/query/structured_search/global_search/search.py @@ -25,7 +25,7 @@ GlobalSearchLLMCallback, ) from graphrag.query.structured_search.global_search.map_system_prompt import ( - MAP_SYSTEM_PROMPT, + MAP_SYSTEM_PROMPT, MAP_USER_PROMPT, ) from graphrag.query.structured_search.global_search.reduce_system_prompt import ( GENERAL_KNOWLEDGE_INSTRUCTION, @@ -64,6 +64,7 @@ def __init__( context_builder: GlobalContextBuilder, token_encoder: tiktoken.Encoding | None = None, map_system_prompt: str = MAP_SYSTEM_PROMPT, + map_user_prompt: str = MAP_USER_PROMPT, reduce_system_prompt: str = REDUCE_SYSTEM_PROMPT, response_type: str = "multiple paragraphs", allow_general_knowledge: bool = False, @@ -83,6 +84,7 @@ def __init__( context_builder_params=context_builder_params, ) self.map_system_prompt = map_system_prompt + self.map_user_prompt = map_user_prompt self.reduce_system_prompt = reduce_system_prompt self.response_type = response_type self.allow_general_knowledge = allow_general_knowledge @@ -173,10 +175,10 @@ async def _map_response_single_batch( start_time = time.time() search_prompt = "" try: - search_prompt = self.map_system_prompt.format(context_data=context_data) + search_prompt = self.map_user_prompt.format(context_data=context_data, user_question=query) search_messages = [ - {"role": "system", "content": search_prompt}, - {"role": "user", "content": query}, + {"role": "system", "content": self.map_system_prompt}, + {"role": "user", "content": search_prompt}, ] async with self.semaphore: search_response = await self.llm.agenerate( From cabf14fe7ddd264821fc7f0dfc628178a6764a64 Mon Sep 17 00:00:00 2001 From: KylinMountain Date: Wed, 10 Jul 2024 12:55:28 +0800 Subject: [PATCH 2/3] add semversioner --- .semversioner/next-release/patch-20240710045321911472.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .semversioner/next-release/patch-20240710045321911472.json diff --git a/.semversioner/next-release/patch-20240710045321911472.json b/.semversioner/next-release/patch-20240710045321911472.json new file mode 100644 index 0000000000..d0b1295a8f --- /dev/null +++ b/.semversioner/next-release/patch-20240710045321911472.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "update map prompt of global query" +} From 4f69839606cfaca782d537396c381087e36bfb4f Mon Sep 17 00:00:00 2001 From: KylinMountain Date: Thu, 11 Jul 2024 11:43:28 +0800 Subject: [PATCH 3/3] update prompt --- .../structured_search/global_search/map_system_prompt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphrag/query/structured_search/global_search/map_system_prompt.py b/graphrag/query/structured_search/global_search/map_system_prompt.py index 81e4afc6a8..0002b340f3 100644 --- a/graphrag/query/structured_search/global_search/map_system_prompt.py +++ b/graphrag/query/structured_search/global_search/map_system_prompt.py @@ -45,7 +45,7 @@ | 4 | CEO of company X | 1 | Person X is CEO of Company X | 3.0 | | 5 | owner of company Y | 1 | Person X is the owner of Company Y | 3.0 | -output: +answer: {{ "points": [ {{"description": "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1, 2, 3, 4)].", "score": 85}}, @@ -59,5 +59,5 @@ ---Data tables--- {context_data} -output: +answer: """