updates

onyx-dot-app · Dec 15, 2024 · 0c13c9a · 0c13c9a
1 parent 24525ca
commit 0c13c9a
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 39 deletions.
diff --git a/backend/danswer/agent_search/answer_query/nodes/answer_generation.py b/backend/danswer/agent_search/answer_query/nodes/answer_generation.py
@@ -9,7 +9,7 @@
 
 def answer_generation(state: AnswerQueryState) -> QAGenerationOutput:
     query = state["query_to_answer"]
-    docs = state["documents"]
+    docs = state["reordered_documents"]
 
     print(f"Number of verified retrieval docs: {len(docs)}")
 

diff --git a/backend/danswer/agent_search/answer_query/nodes/format_answer.py b/backend/danswer/agent_search/answer_query/nodes/format_answer.py
@@ -10,7 +10,7 @@ def format_answer(state: AnswerQueryState) -> AnswerQueryOutput:
                 query=state["query_to_answer"],
                 quality=state["answer_quality"],
                 answer=state["answer"],
-                documents=state["documents"],
+                documents=state["reordered_documents"],
             )
         ],
     )
diff --git a/backend/danswer/agent_search/answer_query/states.py b/backend/danswer/agent_search/answer_query/states.py
@@ -24,7 +24,7 @@ class QAGenerationOutput(TypedDict, total=False):
 
 
 class ExpandedRetrievalOutput(TypedDict):
-    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    reordered_documents: Annotated[list[InferenceSection], dedup_inference_sections]
 
 
 class AnswerQueryState(

diff --git a/backend/danswer/agent_search/expanded_retrieval/edges.py b/backend/danswer/agent_search/expanded_retrieval/edges.py
@@ -6,7 +6,6 @@
 
 from danswer.agent_search.expanded_retrieval.nodes.doc_retrieval import RetrieveInput
 from danswer.agent_search.expanded_retrieval.states import ExpandedRetrievalInput
-from danswer.agent_search.expanded_retrieval.states import ExpandedRetrievalState
 from danswer.agent_search.shared_graph_utils.prompts import REWRITE_PROMPT_MULTI
 from danswer.llm.interfaces import LLM
 
@@ -43,8 +42,3 @@ def parallel_retrieval_edge(state: ExpandedRetrievalInput) -> list[Send | Hashab
         )
         for query in rewritten_queries
     ]
-
-
-def conditionally_rerank_edge(state: ExpandedRetrievalState) -> bool:
-    print(f"conditionally_rerank_edge state: {state.keys()}")
-    return bool(state["search_request"].rerank_settings)
diff --git a/backend/danswer/agent_search/expanded_retrieval/graph_builder.py b/backend/danswer/agent_search/expanded_retrieval/graph_builder.py
@@ -2,7 +2,6 @@
 from langgraph.graph import START
 from langgraph.graph import StateGraph
 
-from danswer.agent_search.expanded_retrieval.edges import conditionally_rerank_edge
 from danswer.agent_search.expanded_retrieval.edges import parallel_retrieval_edge
 from danswer.agent_search.expanded_retrieval.nodes.doc_reranking import doc_reranking
 from danswer.agent_search.expanded_retrieval.nodes.doc_retrieval import doc_retrieval
@@ -54,13 +53,9 @@ def expanded_retrieval_graph_builder() -> StateGraph:
         start_key="doc_retrieval",
         end_key="verification_kickoff",
     )
-    graph.add_conditional_edges(
-        source="doc_verification",
-        path=conditionally_rerank_edge,
-        path_map={
-            True: "doc_reranking",
-            False: END,
-        },
+    graph.add_edge(
+        start_key="doc_verification",
+        end_key="doc_reranking",
     )
     graph.add_edge(
         start_key="doc_reranking",

diff --git a/backend/danswer/agent_search/expanded_retrieval/states.py b/backend/danswer/agent_search/expanded_retrieval/states.py
@@ -33,4 +33,4 @@ class ExpandedRetrievalInput(PrimaryState, total=True):
 
 
 class ExpandedRetrievalOutput(TypedDict):
-    documents: Annotated[list[InferenceSection], dedup_inference_sections]
+    reordered_documents: Annotated[list[InferenceSection], dedup_inference_sections]
diff --git a/backend/danswer/agent_search/main/graph_builder.py b/backend/danswer/agent_search/main/graph_builder.py
@@ -47,10 +47,6 @@ def main_graph_builder() -> StateGraph:
         start_key=START,
         end_key="expanded_retrieval",
     )
-    graph.add_edge(
-        start_key="expanded_retrieval",
-        end_key="generate_initial_answer",
-    )
 
     graph.add_edge(
         start_key=START,
@@ -62,7 +58,7 @@ def main_graph_builder() -> StateGraph:
         path_map=["answer_query"],
     )
     graph.add_edge(
-        start_key="answer_query",
+        start_key=["answer_query", "expanded_retrieval"],
         end_key="generate_initial_answer",
     )
     graph.add_edge(
@@ -82,7 +78,7 @@ def main_graph_builder() -> StateGraph:
     compiled_graph = graph.compile()
     primary_llm, fast_llm = get_default_llms()
     search_request = SearchRequest(
-        query="Who made Excel and what other products did they make?",
+        query="If i am familiar with the function that I need, how can I type it into a cell?",
     )
     with get_session_context_manager() as db_session:
         inputs = MainInput(
@@ -91,9 +87,12 @@ def main_graph_builder() -> StateGraph:
             fast_llm=fast_llm,
             db_session=db_session,
         )
-        output = compiled_graph.invoke(
+        for thing in compiled_graph.stream(
             input=inputs,
+            # stream_mode="debug",
             # debug=True,
-            # subgraphs=True,
-        )
-        print(output)
+            subgraphs=True,
+        ):
+            # print(thing)
+            print()
+            print()
diff --git a/backend/danswer/agent_search/main/nodes/generate_initial_answer.py b/backend/danswer/agent_search/main/nodes/generate_initial_answer.py
@@ -2,7 +2,7 @@
 
 from danswer.agent_search.main.states import InitialAnswerOutput
 from danswer.agent_search.main.states import MainState
-from danswer.agent_search.primary_graph.prompts import INITIAL_RAG_PROMPT
+from danswer.agent_search.shared_graph_utils.prompts import INITIAL_RAG_PROMPT
 from danswer.agent_search.shared_graph_utils.utils import format_docs
 
 
@@ -21,7 +21,7 @@ def generate_initial_answer(state: MainState) -> InitialAnswerOutput:
     """
     for decomp_answer_result in decomp_answer_results:
         if (
-            decomp_answer_result.quality == "yes"
+            decomp_answer_result.quality.lower() == "yes"
             and len(decomp_answer_result.answer) > 0
             and decomp_answer_result.answer != "I don't know"
         ):
@@ -47,5 +47,7 @@ def generate_initial_answer(state: MainState) -> InitialAnswerOutput:
     # Grader
     model = state["fast_llm"]
     response = model.invoke(msg)
+    answer = response.pretty_repr()
 
-    return InitialAnswerOutput(initial_answer=response.pretty_repr())
+    print(answer)
+    return InitialAnswerOutput(initial_answer=answer)
diff --git a/backend/danswer/agent_search/shared_graph_utils/prompts.py b/backend/danswer/agent_search/shared_graph_utils/prompts.py
@@ -1,12 +1,22 @@
-REWRITE_PROMPT_MULTI = """ \n
-    Please convert an initial user question into a 2-3 more appropriate search queries for retrievel from a
-    document store. \n
+REWRITE_PROMPT_MULTI_ORIGINAL = """ \n
+    Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrievel from a
+    document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
+    enabling the system to search more broadly.
+    Also, try to make the search queries not redundant, i.e. not too similar! \n\n
     Here is the initial question:
     \n ------- \n
     {question}
     \n ------- \n
+    Formulate the queries separated by '--' (Do not say 'Query 1: ...', just write the querytext): """
 
-    Formulate the query: """
+REWRITE_PROMPT_MULTI = """ \n
+    Please create a list of 2-3 sample documents that could answer an original question. Each document
+    should be about as long as the original question. \n
+    Here is the initial question:
+    \n ------- \n
+    {question}
+    \n ------- \n
+    Formulate the sample documents separated by '--' (Do not say 'Document 1: ...', just write the text): """
 
 BASE_RAG_PROMPT = """ \n
     You are an assistant for question-answering tasks. Use the context provided below - and only the
@@ -40,7 +50,7 @@
     Please answer with yes or no:"""
 
 VERIFIER_PROMPT = """ \n
-    Please check whether the document seems to be relevant for the answer of the original question. Please
+    Please check whether the document seems to be relevant for the answer of the question. Please
     only answer with 'yes' or 'no' \n
     Here is the initial question:
     \n ------- \n
@@ -330,7 +340,7 @@
 
 
 INITIAL_DECOMPOSITION_PROMPT = """ \n
-    Please decompose an initial user question into not more than 4 appropriate sub-questions that help to
+    Please decompose an initial user question into 2 or 3 appropriate sub-questions that help to
     answer the original question. The purpose for this decomposition is to isolate individulal entities
     (i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
     for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our

diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
@@ -27,13 +27,13 @@ jira==3.5.1
 jsonref==1.1.0
 trafilatura==1.12.2
 langchain==0.3.7
-langchain-core==0.3.20
+langchain-core==0.3.24
 langchain-openai==0.2.9
 langchain-text-splitters==0.3.2
 langchainhub==0.1.21
 langgraph==0.2.59
 langgraph-checkpoint==2.0.5
-langgraph-sdk==0.1.36
+langgraph-sdk==0.1.44
 litellm==1.53.1
 lxml==5.3.0
 lxml_html_clean==0.2.2
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,4 +33,4 @@ class ExpandedRetrievalInput(PrimaryState, total=True):


		class ExpandedRetrievalOutput(TypedDict):
		documents: Annotated[list[InferenceSection], dedup_inference_sections]
		reordered_documents: Annotated[list[InferenceSection], dedup_inference_sections]