From 9894475bb66f9de8ab777dbc052c50386df325d4 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Thu, 16 Nov 2023 09:19:54 -0500 Subject: [PATCH] x --- Makefile | 2 +- csv-qa/custom_agent.py | 60 +++++++++++-------- csv-qa/pandas_agent_gpt_35.py | 13 ++-- csv-qa/pandas_agent_gpt_4.py | 14 ++--- csv-qa/pandas_agent_instruct.py | 51 +++++++++------- csv-qa/pandas_ai.py | 45 +++++++------- csv-qa/streamlit_app.py | 49 ++++++++------- csv-qa/upload_data.py | 2 +- extraction/streamlit_app.py | 43 ++++++------- langchain-docs-benchmarking/app/server.py | 5 +- .../anthropic_iterative_search/retriever.py | 2 +- .../chat-langchain/chat_langchain/chain.py | 3 +- .../langchain_docs_retriever/download_db.py | 3 +- .../langchain_docs_retriever/retriever.py | 5 +- .../oai-assistant/oai_assistant/chain.py | 15 +++-- langchain-docs-benchmarking/run_evals.py | 20 +++---- .../run_experiments.py | 5 +- .../correctness/test_correctness_evaluator.py | 3 +- 18 files changed, 181 insertions(+), 159 deletions(-) diff --git a/Makefile b/Makefile index a55d89af..1c8badea 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ test_watch: ###################### # Define a variable for Python and notebook files. -lint format: PYTHON_FILES=langchain_benchmarks tests +lint format: PYTHON_FILES=. lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=. --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') lint lint_diff: diff --git a/csv-qa/custom_agent.py b/csv-qa/custom_agent.py index 8f47c0be..52976ed8 100644 --- a/csv-qa/custom_agent.py +++ b/csv-qa/custom_agent.py @@ -1,22 +1,25 @@ -from langchain.agents import OpenAIFunctionsAgent, AgentExecutor -from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder -from langchain.tools import PythonAstREPLTool import pandas as pd +from langchain.agents import AgentExecutor, OpenAIFunctionsAgent +from langchain.agents.agent_toolkits.conversational_retrieval.tool import ( + create_retriever_tool, +) from langchain.chat_models import ChatOpenAI -from langsmith import Client -from langchain.smith import RunEvalConfig, run_on_dataset -from pydantic import BaseModel, Field from langchain.embeddings import OpenAIEmbeddings +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain.smith import RunEvalConfig, run_on_dataset +from langchain.tools import PythonAstREPLTool from langchain.vectorstores import FAISS -from langchain.agents.agent_toolkits.conversational_retrieval.tool import create_retriever_tool - +from langsmith import Client +from pydantic import BaseModel, Field -pd.set_option('display.max_rows', 20) -pd.set_option('display.max_columns', 20) +pd.set_option("display.max_rows", 20) +pd.set_option("display.max_columns", 20) embedding_model = OpenAIEmbeddings() vectorstore = FAISS.load_local("titanic_data", embedding_model) -retriever_tool = create_retriever_tool(vectorstore.as_retriever(), "person_name_search", "Search for a person by name") +retriever_tool = create_retriever_tool( + vectorstore.as_retriever(), "person_name_search", "Search for a person by name" +) TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`. @@ -42,7 +45,6 @@ """ - class PythonInputs(BaseModel): query: str = Field(description="code snippet to run") @@ -51,27 +53,33 @@ class PythonInputs(BaseModel): df = pd.read_csv("titanic.csv") template = TEMPLATE.format(dhead=df.head().to_markdown()) - prompt = ChatPromptTemplate.from_messages([ - ("system", template), - MessagesPlaceholder(variable_name="agent_scratchpad"), - ("human", "{input}") - ]) + prompt = ChatPromptTemplate.from_messages( + [ + ("system", template), + MessagesPlaceholder(variable_name="agent_scratchpad"), + ("human", "{input}"), + ] + ) def get_chain(): - repl = PythonAstREPLTool(locals={"df": df}, name="python_repl", - description="Runs code and returns the output of the final line", - args_schema=PythonInputs) + repl = PythonAstREPLTool( + locals={"df": df}, + name="python_repl", + description="Runs code and returns the output of the final line", + args_schema=PythonInputs, + ) tools = [repl, retriever_tool] - agent = OpenAIFunctionsAgent(llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools) - agent_executor = AgentExecutor(agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate") + agent = OpenAIFunctionsAgent( + llm=ChatOpenAI(temperature=0, model="gpt-4"), prompt=prompt, tools=tools + ) + agent_executor = AgentExecutor( + agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate" + ) return agent_executor - client = Client() eval_config = RunEvalConfig( - evaluators=[ - "qa" - ], + evaluators=["qa"], ) chain_results = run_on_dataset( client, diff --git a/csv-qa/pandas_agent_gpt_35.py b/csv-qa/pandas_agent_gpt_35.py index 6db228b0..57410d82 100644 --- a/csv-qa/pandas_agent_gpt_35.py +++ b/csv-qa/pandas_agent_gpt_35.py @@ -1,9 +1,9 @@ import pandas as pd -from langchain.chat_models import ChatOpenAI from langchain.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType -from langsmith import Client +from langchain.chat_models import ChatOpenAI from langchain.smith import RunEvalConfig, run_on_dataset +from langsmith import Client if __name__ == "__main__": df = pd.read_csv("titanic.csv") @@ -18,20 +18,17 @@ def get_chain(): df, agent_type=AgentType.OPENAI_FUNCTIONS, agent_executor_kwargs=agent_executor_kwargs, - max_iterations=5 + max_iterations=5, ) return agent - client = Client() eval_config = RunEvalConfig( - evaluators=[ - "qa" - ], + evaluators=["qa"], ) chain_results = run_on_dataset( client, dataset_name="Titanic CSV Data", llm_or_chain_factory=get_chain, evaluation=eval_config, - ) \ No newline at end of file + ) diff --git a/csv-qa/pandas_agent_gpt_4.py b/csv-qa/pandas_agent_gpt_4.py index 1e03a702..c72ae6bc 100644 --- a/csv-qa/pandas_agent_gpt_4.py +++ b/csv-qa/pandas_agent_gpt_4.py @@ -1,14 +1,13 @@ import pandas as pd -from langchain.chat_models import ChatOpenAI from langchain.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType -from langsmith import Client +from langchain.chat_models import ChatOpenAI from langchain.smith import RunEvalConfig, run_on_dataset +from langsmith import Client if __name__ == "__main__": df = pd.read_csv("titanic.csv") - def get_chain(): llm = ChatOpenAI(temperature=0, model="gpt-4") agent_executor_kwargs = { @@ -19,20 +18,17 @@ def get_chain(): df, agent_type=AgentType.OPENAI_FUNCTIONS, agent_executor_kwargs=agent_executor_kwargs, - max_iterations=5 + max_iterations=5, ) return agent - client = Client() eval_config = RunEvalConfig( - evaluators=[ - "qa" - ], + evaluators=["qa"], ) chain_results = run_on_dataset( client, dataset_name="Titanic CSV Data", llm_or_chain_factory=get_chain, evaluation=eval_config, - ) \ No newline at end of file + ) diff --git a/csv-qa/pandas_agent_instruct.py b/csv-qa/pandas_agent_instruct.py index 307603a2..64d1d1e3 100644 --- a/csv-qa/pandas_agent_instruct.py +++ b/csv-qa/pandas_agent_instruct.py @@ -1,22 +1,25 @@ -from langchain.agents import ZeroShotAgent, AgentExecutor -from langchain.prompts import PromptTemplate -from langchain.tools import PythonAstREPLTool import pandas as pd +from langchain.agents import AgentExecutor, ZeroShotAgent +from langchain.agents.agent_toolkits.conversational_retrieval.tool import ( + create_retriever_tool, +) +from langchain.embeddings import OpenAIEmbeddings from langchain.llms import OpenAI -from langsmith import Client +from langchain.prompts import PromptTemplate from langchain.smith import RunEvalConfig, run_on_dataset -from pydantic import BaseModel, Field -from langchain.embeddings import OpenAIEmbeddings +from langchain.tools import PythonAstREPLTool from langchain.vectorstores import FAISS -from langchain.agents.agent_toolkits.conversational_retrieval.tool import create_retriever_tool - +from langsmith import Client +from pydantic import BaseModel, Field -pd.set_option('display.max_rows', 20) -pd.set_option('display.max_columns', 20) +pd.set_option("display.max_rows", 20) +pd.set_option("display.max_columns", 20) embedding_model = OpenAIEmbeddings() vectorstore = FAISS.load_local("titanic_data", embedding_model) -retriever_tool = create_retriever_tool(vectorstore.as_retriever(), "person_name_search", "Search for a person by name") +retriever_tool = create_retriever_tool( + vectorstore.as_retriever(), "person_name_search", "Search for a person by name" +) TEMPLATE = """You are working with a pandas dataframe in Python. The name of the dataframe is `df`. @@ -41,7 +44,6 @@ Use `python_repl` since even though the question is about a person, you don't know their name so you can't include it.""" - class PythonInputs(BaseModel): query: str = Field(description="code snippet to run") @@ -50,22 +52,27 @@ class PythonInputs(BaseModel): df = pd.read_csv("titanic.csv") template = TEMPLATE.format(dhead=df.head().to_markdown()) - def get_chain(): - repl = PythonAstREPLTool(locals={"df": df}, name="python_repl", - description="Runs code and returns the output of the final line", - args_schema=PythonInputs) + repl = PythonAstREPLTool( + locals={"df": df}, + name="python_repl", + description="Runs code and returns the output of the final line", + args_schema=PythonInputs, + ) tools = [repl, retriever_tool] - agent = ZeroShotAgent.from_llm_and_tools(llm=OpenAI(temperature=0, model="gpt-3.5-turbo-instruct"), tools=tools, prefix=template) - agent_executor = AgentExecutor(agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate") + agent = ZeroShotAgent.from_llm_and_tools( + llm=OpenAI(temperature=0, model="gpt-3.5-turbo-instruct"), + tools=tools, + prefix=template, + ) + agent_executor = AgentExecutor( + agent=agent, tools=tools, max_iterations=5, early_stopping_method="generate" + ) return agent_executor - client = Client() eval_config = RunEvalConfig( - evaluators=[ - "qa" - ], + evaluators=["qa"], ) chain_results = run_on_dataset( client, diff --git a/csv-qa/pandas_ai.py b/csv-qa/pandas_ai.py index ceab68eb..e828e630 100644 --- a/csv-qa/pandas_ai.py +++ b/csv-qa/pandas_ai.py @@ -1,44 +1,47 @@ import pandas as pd -from langchain.chat_models import ChatOpenAI from langchain.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType -from langsmith import Client -from langchain.smith import RunEvalConfig, run_on_dataset -import pandas as pd -from pandasai import PandasAI - from langchain.chat_models import ChatOpenAI from langchain.prompts import ChatPromptTemplate from langchain.schema.output_parser import StrOutputParser +from langchain.smith import RunEvalConfig, run_on_dataset +from langsmith import Client +from pandasai import PandasAI if __name__ == "__main__": df = pd.read_csv("titanic.csv") pandas_ai = PandasAI(ChatOpenAI(temperature=0, model="gpt-4"), enable_cache=False) - prompt = ChatPromptTemplate.from_messages([ - ("system", - "Answer the users question about some data. A data scientist will run some code and the results will be returned to you to use in your answer"), - ("human", "Question: {input}"), - ("human", "Data Scientist Result: {result}"), - ]) + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + "Answer the users question about some data. A data scientist will run some code and the results will be returned to you to use in your answer", + ), + ("human", "Question: {input}"), + ("human", "Data Scientist Result: {result}"), + ] + ) def get_chain(): - chain = { - "input": lambda x: x["input_question"], - "result": lambda x: pandas_ai(df, prompt=x['input_question']) - } | prompt | ChatOpenAI(temperature=0, model="gpt-4") | StrOutputParser() + chain = ( + { + "input": lambda x: x["input_question"], + "result": lambda x: pandas_ai(df, prompt=x["input_question"]), + } + | prompt + | ChatOpenAI(temperature=0, model="gpt-4") + | StrOutputParser() + ) return chain - client = Client() eval_config = RunEvalConfig( - evaluators=[ - "qa" - ], + evaluators=["qa"], ) chain_results = run_on_dataset( client, dataset_name="Titanic CSV Data", llm_or_chain_factory=get_chain, evaluation=eval_config, - ) \ No newline at end of file + ) diff --git a/csv-qa/streamlit_app.py b/csv-qa/streamlit_app.py index a94d3f5d..fa7b4d57 100644 --- a/csv-qa/streamlit_app.py +++ b/csv-qa/streamlit_app.py @@ -1,10 +1,10 @@ import pandas as pd import streamlit as st -from langchain.chat_models import ChatOpenAI from langchain.agents.agent_toolkits import create_pandas_dataframe_agent from langchain.agents.agent_types import AgentType +from langchain.chat_models import ChatOpenAI -df = pd.read_csv('titanic.csv') +df = pd.read_csv("titanic.csv") llm = ChatOpenAI(temperature=0) @@ -12,31 +12,36 @@ from langsmith import Client + client = Client() + + def send_feedback(run_id, score): client.create_feedback(run_id, "user_score", score=score) -st.set_page_config(page_title='πŸ¦œπŸ”— Ask the CSV App') -st.title('πŸ¦œπŸ”— Ask the CSV App') -st.info("Most 'question answering' applications run over unstructured text data. But a lot of the data in the world is tabular data! This is an attempt to create an application using [LangChain](https://github.com/langchain-ai/langchain) to let you ask questions of data in tabular format. For this demo application, we will use the Titanic Dataset. Please explore it [here](https://github.com/datasciencedojo/datasets/blob/master/titanic.csv) to get a sense for what questions you can ask. Please leave feedback on well the question is answered, and we will use that improve the application!") -query_text = st.text_input('Enter your question:', placeholder = 'Who was in cabin C128?') +st.set_page_config(page_title="πŸ¦œπŸ”— Ask the CSV App") +st.title("πŸ¦œπŸ”— Ask the CSV App") +st.info( + "Most 'question answering' applications run over unstructured text data. But a lot of the data in the world is tabular data! This is an attempt to create an application using [LangChain](https://github.com/langchain-ai/langchain) to let you ask questions of data in tabular format. For this demo application, we will use the Titanic Dataset. Please explore it [here](https://github.com/datasciencedojo/datasets/blob/master/titanic.csv) to get a sense for what questions you can ask. Please leave feedback on well the question is answered, and we will use that improve the application!" +) + +query_text = st.text_input("Enter your question:", placeholder="Who was in cabin C128?") # Form input and query result = None -with st.form('myform', clear_on_submit=True): - submitted = st.form_submit_button('Submit') - if submitted: - with st.spinner('Calculating...'): - response = agent({"input": query_text}, include_run_info=True) - result = response["output"] - run_id = response["__run"].run_id +with st.form("myform", clear_on_submit=True): + submitted = st.form_submit_button("Submit") + if submitted: + with st.spinner("Calculating..."): + response = agent({"input": query_text}, include_run_info=True) + result = response["output"] + run_id = response["__run"].run_id if result is not None: - st.info(result) - col_blank, col_text, col1, col2 = st.columns([10, 2,1,1]) - with col_text: - st.text("Feedback:") - with col1: - st.button("πŸ‘", on_click=send_feedback, args=(run_id, 1)) - with col2: - st.button("πŸ‘Ž", on_click=send_feedback, args=(run_id, 0)) - + st.info(result) + col_blank, col_text, col1, col2 = st.columns([10, 2, 1, 1]) + with col_text: + st.text("Feedback:") + with col1: + st.button("πŸ‘", on_click=send_feedback, args=(run_id, 1)) + with col2: + st.button("πŸ‘Ž", on_click=send_feedback, args=(run_id, 0)) diff --git a/csv-qa/upload_data.py b/csv-qa/upload_data.py index 26d9168c..365ed019 100644 --- a/csv-qa/upload_data.py +++ b/csv-qa/upload_data.py @@ -8,5 +8,5 @@ output_keys=["output_text"], name="Titanic CSV Data", description="QA over titanic data", - data_type = "kv" + data_type="kv", ) diff --git a/extraction/streamlit_app.py b/extraction/streamlit_app.py index cf1c3879..fb7eafc7 100644 --- a/extraction/streamlit_app.py +++ b/extraction/streamlit_app.py @@ -1,18 +1,23 @@ import streamlit as st -from langsmith import Client -from langchain.chat_models import ChatOpenAI from langchain.chains import create_extraction_chain +from langchain.chat_models import ChatOpenAI +from langsmith import Client -st.set_page_config(page_title='πŸ¦œπŸ”— Text-to-graph extraction') +st.set_page_config(page_title="πŸ¦œπŸ”— Text-to-graph extraction") client = Client() + + def send_feedback(run_id, score): client.create_feedback(run_id, "user_score", score=score) -st.title('πŸ¦œπŸ”— Text-to-graph playground') -st.info("This playground explores the use of [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates) and [LangChain](https://github.com/langchain-ai/langchain) to build knowledge graphs from user-input text. It breaks down the user input text into knowledge graph triples of subject (primary entities or concepts in a sentence), predicate (actions or relationships that connect subjects to objects), and object (entities or concepts that interact with or are acted upon by the subjects).") + +st.title("πŸ¦œπŸ”— Text-to-graph playground") +st.info( + "This playground explores the use of [OpenAI functions](https://openai.com/blog/function-calling-and-other-api-updates) and [LangChain](https://github.com/langchain-ai/langchain) to build knowledge graphs from user-input text. It breaks down the user input text into knowledge graph triples of subject (primary entities or concepts in a sentence), predicate (actions or relationships that connect subjects to objects), and object (entities or concepts that interact with or are acted upon by the subjects)." +) # Input text (optional default) -oppenheimer_text=''''Julius Robert Oppenheimer, often known as Robert or "Oppie", is heralded as the father of the atomic bomb. Emerging from a non-practicing Jewish family in New York, he made several breakthroughs, such as the early black hole theory, before the monumental Manhattan Project. His wife, Katherine β€œKitty” Oppenheimer, was a German-born woman with a complex past, including connections to the Communist Party. Oppenheimer\'s journey was beset by political adversaries, notably Lewis Strauss, chairman of the U.S. Atomic Energy Commission, and William Borden, an executive director with hawkish nuclear ambitions. These tensions culminated in the famous 1954 security hearing. Influential figures like lieutenant general Leslie Groves, who had also overseen the Pentagon\'s creation, stood by Oppenheimer\'s side, having earlier chosen him for the Manhattan Project and the Los Alamos location. Intimate relationships, like that with Jean Tatlock, a Communist and the possible muse behind the Trinity test\'s name, and colleagues like Frank, Oppenheimer\'s physicist brother, intertwined with his professional life. Scientists such as Ernest Lawrence, Edward Teller, David Hill, Richard Feynman, and Hans Bethe were some of Oppenheimer\'s contemporaries, each contributing to and contesting the atomic age\'s directions. Boris Pash\'s investigations, and the perspectives of figures like Leo Szilard, Niels Bohr, Harry Truman, and others, framed the broader sociopolitical context. Meanwhile, individuals like Robert Serber, Enrico Fermi, Albert Einstein, and Isidor Isaac Rabi, among many others, each played their parts in this narrative, from naming the atomic bombs to pivotal scientific contributions and advisory roles. All these figures, together with the backdrop of World War II, McCarthyism, and the dawn of the nuclear age, presented a complex mosaic of ambitions, loyalties, betrayals, and ideologies.oppenheimer_short.txt''' +oppenheimer_text = """'Julius Robert Oppenheimer, often known as Robert or "Oppie", is heralded as the father of the atomic bomb. Emerging from a non-practicing Jewish family in New York, he made several breakthroughs, such as the early black hole theory, before the monumental Manhattan Project. His wife, Katherine β€œKitty” Oppenheimer, was a German-born woman with a complex past, including connections to the Communist Party. Oppenheimer\'s journey was beset by political adversaries, notably Lewis Strauss, chairman of the U.S. Atomic Energy Commission, and William Borden, an executive director with hawkish nuclear ambitions. These tensions culminated in the famous 1954 security hearing. Influential figures like lieutenant general Leslie Groves, who had also overseen the Pentagon\'s creation, stood by Oppenheimer\'s side, having earlier chosen him for the Manhattan Project and the Los Alamos location. Intimate relationships, like that with Jean Tatlock, a Communist and the possible muse behind the Trinity test\'s name, and colleagues like Frank, Oppenheimer\'s physicist brother, intertwined with his professional life. Scientists such as Ernest Lawrence, Edward Teller, David Hill, Richard Feynman, and Hans Bethe were some of Oppenheimer\'s contemporaries, each contributing to and contesting the atomic age\'s directions. Boris Pash\'s investigations, and the perspectives of figures like Leo Szilard, Niels Bohr, Harry Truman, and others, framed the broader sociopolitical context. Meanwhile, individuals like Robert Serber, Enrico Fermi, Albert Einstein, and Isidor Isaac Rabi, among many others, each played their parts in this narrative, from naming the atomic bombs to pivotal scientific contributions and advisory roles. All these figures, together with the backdrop of World War II, McCarthyism, and the dawn of the nuclear age, presented a complex mosaic of ambitions, loyalties, betrayals, and ideologies.oppenheimer_short.txt""" # Knowledge triplet schema default_schema = { @@ -31,46 +36,44 @@ def send_feedback(run_id, score): st.warning(f"Text is too long. Processing only the first {MAX_CHARS} characters") user_input_text = user_input_text[:MAX_CHARS] + # Output formatting of triples def json_to_markdown_table(json_list): if not json_list: return "No data available." - + # Extract headers headers = json_list[0].keys() markdown_table = " | ".join(headers) + "\n" markdown_table += " | ".join(["---"] * len(headers)) + "\n" - + # Extract rows for item in json_list: row = " | ".join([str(item[header]) for header in headers]) markdown_table += row + "\n" - + return markdown_table + # Form input and query markdown_output = None -with st.form('myform', clear_on_submit=True): - - submitted = st.form_submit_button('Submit') +with st.form("myform", clear_on_submit=True): + submitted = st.form_submit_button("Submit") if submitted: - - with st.spinner('Calculating...'): - + with st.spinner("Calculating..."): llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo") chain = create_extraction_chain(default_schema, llm) - extraction_output = chain(user_input_text,include_run_info=True) - markdown_output = json_to_markdown_table(extraction_output['text']) + extraction_output = chain(user_input_text, include_run_info=True) + markdown_output = json_to_markdown_table(extraction_output["text"]) run_id = extraction_output["__run"].run_id - + # Feeback if markdown_output is not None: st.markdown(markdown_output) - col_blank, col_text, col1, col2 = st.columns([10, 2,1,1]) + col_blank, col_text, col1, col2 = st.columns([10, 2, 1, 1]) with col_text: st.text("Feedback:") with col1: st.button("πŸ‘", on_click=send_feedback, args=(run_id, 1)) with col2: st.button("πŸ‘Ž", on_click=send_feedback, args=(run_id, 0)) - diff --git a/langchain-docs-benchmarking/app/server.py b/langchain-docs-benchmarking/app/server.py index f0764e54..49b93469 100644 --- a/langchain-docs-benchmarking/app/server.py +++ b/langchain-docs-benchmarking/app/server.py @@ -1,8 +1,8 @@ -from fastapi import FastAPI -from langserve import add_routes from chat_langchain.chain import chain +from fastapi import FastAPI from openai_functions_agent import agent_executor as openai_functions_agent_chain +from langserve import add_routes app = FastAPI() @@ -16,6 +16,7 @@ add_routes(app, openai_functions_agent_chain, path="/openai-functions-agent") + def run_server(port: int = 1983): import uvicorn diff --git a/langchain-docs-benchmarking/packages/anthropic-iterative-search/anthropic_iterative_search/retriever.py b/langchain-docs-benchmarking/packages/anthropic-iterative-search/anthropic_iterative_search/retriever.py index 73209067..6fe1830a 100644 --- a/langchain-docs-benchmarking/packages/anthropic-iterative-search/anthropic_iterative_search/retriever.py +++ b/langchain-docs-benchmarking/packages/anthropic-iterative-search/anthropic_iterative_search/retriever.py @@ -12,6 +12,6 @@ @tool -def search(query, callbacks = None): +def search(query, callbacks=None): """Search the LangChain docs with the retriever.""" return retriever.get_relevant_documents(query, callbacks=callbacks) diff --git a/langchain-docs-benchmarking/packages/chat-langchain/chat_langchain/chain.py b/langchain-docs-benchmarking/packages/chat-langchain/chat_langchain/chain.py index 4bda4912..a56a93ec 100644 --- a/langchain-docs-benchmarking/packages/chat-langchain/chat_langchain/chain.py +++ b/langchain-docs-benchmarking/packages/chat-langchain/chat_langchain/chain.py @@ -2,7 +2,7 @@ from operator import itemgetter from typing import Dict, List, Optional, Sequence -from langchain.chat_models import ChatAnthropic, ChatOpenAI, ChatFireworks +from langchain.chat_models import ChatAnthropic, ChatFireworks, ChatOpenAI from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate from langchain.schema import Document from langchain.schema.language_model import BaseLanguageModel @@ -18,7 +18,6 @@ from langchain_docs_retriever.retriever import get_retriever from pydantic import BaseModel - RESPONSE_TEMPLATE = """\ You are an expert programmer and problem-solver, tasked with answering any question \ about Langchain. diff --git a/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/download_db.py b/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/download_db.py index 21ef337e..0c9669c6 100644 --- a/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/download_db.py +++ b/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/download_db.py @@ -1,7 +1,8 @@ import os -import requests import zipfile +import requests + remote_url = "https://storage.googleapis.com/benchmarks-artifacts/langchain-docs-benchmarking/chroma_db.zip" directory = os.path.dirname(os.path.realpath(__file__)) db_directory = os.path.join(directory, "db") diff --git a/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/retriever.py b/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/retriever.py index 954279a5..9c6f21a6 100644 --- a/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/retriever.py +++ b/langchain-docs-benchmarking/packages/langchain-docs-retriever/langchain_docs_retriever/retriever.py @@ -2,12 +2,13 @@ from typing import Optional from langchain.embeddings import OpenAIEmbeddings + +# from langchain_docs_retriever.voyage import VoyageEmbeddings +from langchain.embeddings.voyageai import VoyageEmbeddings from langchain.schema.embeddings import Embeddings from langchain.schema.retriever import BaseRetriever from langchain.vectorstores.chroma import Chroma -# from langchain_docs_retriever.voyage import VoyageEmbeddings -from langchain.embeddings.voyageai import VoyageEmbeddings from .download_db import fetch_langchain_docs_db WEAVIATE_DOCS_INDEX_NAME = "LangChain_agent_docs" diff --git a/langchain-docs-benchmarking/packages/oai-assistant/oai_assistant/chain.py b/langchain-docs-benchmarking/packages/oai-assistant/oai_assistant/chain.py index 620ce243..c03f4afd 100644 --- a/langchain-docs-benchmarking/packages/oai-assistant/oai_assistant/chain.py +++ b/langchain-docs-benchmarking/packages/oai-assistant/oai_assistant/chain.py @@ -1,8 +1,9 @@ -from langchain_experimental.openai_assistant import OpenAIAssistantRunnable +import json + from langchain.agents import AgentExecutor -from langchain_docs_retriever.retriever import get_retriever from langchain.tools import tool -import json +from langchain_docs_retriever.retriever import get_retriever +from langchain_experimental.openai_assistant import OpenAIAssistantRunnable # This is used to tell the model how to best use the retriever. @@ -28,6 +29,8 @@ def search(query, callbacks=None) -> str: ) -agent_executor = (lambda x: {"content": x["question"]}) | AgentExecutor( - agent=agent, tools=tools -) | (lambda x: x["output"]) +agent_executor = ( + (lambda x: {"content": x["question"]}) + | AgentExecutor(agent=agent, tools=tools) + | (lambda x: x["output"]) +) diff --git a/langchain-docs-benchmarking/run_evals.py b/langchain-docs-benchmarking/run_evals.py index 45917705..6739dcb1 100644 --- a/langchain-docs-benchmarking/run_evals.py +++ b/langchain-docs-benchmarking/run_evals.py @@ -1,21 +1,19 @@ import argparse +import importlib.util +import os +import sys +import uuid from functools import partial -from typing import Optional, Callable +from typing import Callable, Optional +from anthropic_iterative_search.chain import chain as anthropic_agent_chain +from chat_langchain.chain import create_chain from langchain.chat_models import ChatOpenAI -from langchain.smith import RunEvalConfig, run_on_dataset from langchain.schema.runnable import Runnable +from langchain.smith import RunEvalConfig, run_on_dataset from langsmith import Client -from chat_langchain.chain import create_chain -from anthropic_iterative_search.chain import chain as anthropic_agent_chain -from openai_functions_agent import agent_executor as openai_functions_agent_chain from oai_assistant.chain import agent_executor as openai_assistant_chain -import os -import importlib.util -import sys - - -import uuid +from openai_functions_agent import agent_executor as openai_functions_agent_chain ls_client = Client() diff --git a/langchain-docs-benchmarking/run_experiments.py b/langchain-docs-benchmarking/run_experiments.py index 7b2238a0..51569e04 100644 --- a/langchain-docs-benchmarking/run_experiments.py +++ b/langchain-docs-benchmarking/run_experiments.py @@ -1,8 +1,9 @@ import argparse -from run_evals import main -from prepare_dataset import create_langchain_docs_dataset import json +from prepare_dataset import create_langchain_docs_dataset +from run_evals import main + experiments = [ { # "server_url": "http://localhost:1983/openai-functions-agent", diff --git a/meta-evals/correctness/test_correctness_evaluator.py b/meta-evals/correctness/test_correctness_evaluator.py index bcb21288..4388d063 100644 --- a/meta-evals/correctness/test_correctness_evaluator.py +++ b/meta-evals/correctness/test_correctness_evaluator.py @@ -3,8 +3,7 @@ from uuid import uuid4 import pytest -from langchain import hub -from langchain import chat_models, llms +from langchain import chat_models, hub, llms from langchain.evaluation import load_evaluator from langchain.schema import runnable from langchain.smith import RunEvalConfig, run_on_dataset