deploy: 29e4e87

langchain-ai · Mar 14, 2024 · 295acea · 295acea
commit 295acea
Show file tree

Hide file tree

Showing 226 changed files with 54,160 additions and 0 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 0eab3cfc0be34c97492d7db6e14439cc
+tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree
diff --git a/.doctrees/notebooks/datasets.doctree b/.doctrees/notebooks/datasets.doctree
diff --git a/.doctrees/notebooks/extraction/chat_extraction.doctree b/.doctrees/notebooks/extraction/chat_extraction.doctree
diff --git a/.doctrees/notebooks/extraction/email.doctree b/.doctrees/notebooks/extraction/email.doctree
diff --git a/.doctrees/notebooks/extraction/high_cardinality.doctree b/.doctrees/notebooks/extraction/high_cardinality.doctree
diff --git a/.doctrees/notebooks/extraction/intro.doctree b/.doctrees/notebooks/extraction/intro.doctree
diff --git a/.doctrees/notebooks/getting_started.doctree b/.doctrees/notebooks/getting_started.doctree
diff --git a/.doctrees/notebooks/models.doctree b/.doctrees/notebooks/models.doctree
diff --git a/.doctrees/notebooks/retrieval/comparing_techniques.doctree b/.doctrees/notebooks/retrieval/comparing_techniques.doctree
diff --git a/.doctrees/notebooks/retrieval/intro.doctree b/.doctrees/notebooks/retrieval/intro.doctree
diff --git a/.doctrees/notebooks/retrieval/langchain_docs_qa.doctree b/.doctrees/notebooks/retrieval/langchain_docs_qa.doctree
diff --git a/.doctrees/notebooks/retrieval/multi_modal_benchmarking/experiments/gemini.doctree b/.doctrees/notebooks/retrieval/multi_modal_benchmarking/experiments/gemini.doctree
diff --git a/.doctrees/notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval.doctree b/.doctrees/notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval.doctree
diff --git a/.doctrees/notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval_baseline.doctree b/.doctrees/notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval_baseline.doctree
diff --git a/.doctrees/notebooks/retrieval/semi_structured_benchmarking/semi_structured.doctree b/.doctrees/notebooks/retrieval/semi_structured_benchmarking/semi_structured.doctree
diff --git a/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_chunk_sizes.doctree b/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_chunk_sizes.doctree
diff --git a/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_long_context.doctree b/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_long_context.doctree
diff --git a/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_multi_vector.doctree b/.doctrees/notebooks/retrieval/semi_structured_benchmarking/ss_eval_multi_vector.doctree
diff --git a/.doctrees/notebooks/run_without_langsmith.doctree b/.doctrees/notebooks/run_without_langsmith.doctree
diff --git a/.doctrees/notebooks/tool_usage/benchmark_all_tasks.doctree b/.doctrees/notebooks/tool_usage/benchmark_all_tasks.doctree
diff --git a/.doctrees/notebooks/tool_usage/intro.doctree b/.doctrees/notebooks/tool_usage/intro.doctree
diff --git a/.doctrees/notebooks/tool_usage/multiverse_math.doctree b/.doctrees/notebooks/tool_usage/multiverse_math.doctree
diff --git a/.doctrees/notebooks/tool_usage/oss_experiments/mixtral_experiments.doctree b/.doctrees/notebooks/tool_usage/oss_experiments/mixtral_experiments.doctree
diff --git a/.doctrees/notebooks/tool_usage/relational_data.doctree b/.doctrees/notebooks/tool_usage/relational_data.doctree
diff --git a/.doctrees/notebooks/tool_usage/typewriter_1.doctree b/.doctrees/notebooks/tool_usage/typewriter_1.doctree
diff --git a/.doctrees/notebooks/tool_usage/typewriter_26.doctree b/.doctrees/notebooks/tool_usage/typewriter_26.doctree
diff --git a/.nojekyll b/.nojekyll
diff --git a/_images/41c32bbe64e3f7da917391789c987ea24b2a90f2094ffb7367ed810e2b1c255f.png b/_images/41c32bbe64e3f7da917391789c987ea24b2a90f2094ffb7367ed810e2b1c255f.png
diff --git a/_images/92224e7d74215e350741f524bdc49ee9c96d32ad6ad59cf2806c434b84bb4b3c.png b/_images/92224e7d74215e350741f524bdc49ee9c96d32ad6ad59cf2806c434b84bb4b3c.png
diff --git a/_sources/index.md b/_sources/index.md
@@ -0,0 +1,112 @@
+# 🦜💯 LangChain Benchmarks
+
+[![Release Notes](https://img.shields.io/github/release/langchain-ai/langchain-benchmarks)](https://github.com/langchain-ai/langchain-benchmarks/releases)
+[![CI](https://github.com/langchain-ai/langchain-benchmarks/actions/workflows/ci.yml/badge.svg)](https://github.com/langchain-ai/langchain-benchmarks/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/langchainai.svg?style=social&label=Follow%20%40LangChainAI)](https://twitter.com/langchainai)
+[![](https://dcbadge.vercel.app/api/server/6adMQxSpJS?compact=true&style=flat)](https://discord.gg/6adMQxSpJS)
+[![Open Issues](https://img.shields.io/github/issues-raw/langchain-ai/langchain-benchmarks)](https://github.com/langchain-ai/langchain-benchmarks/issues)
+
+
+[📖 Documentation](https://langchain-ai.github.io/langchain-benchmarks/index.html)
+
+A package to help benchmark various LLM related tasks.
+
+The benchmarks are organized by end-to-end use cases, and
+utilize [LangSmith](https://smith.langchain.com/) heavily.
+
+We have several goals in open sourcing this:
+
+- Showing how we collect our benchmark datasets for each task
+- Showing what the benchmark datasets we use for each task is
+- Showing how we evaluate each task
+- Encouraging others to benchmark their solutions on these tasks (we are always looking for better ways of doing things!)
+
+## Installation
+
+To install the packages, run the following command:
+
+```bash
+pip install -U langchain-benchmarks
+```
+
+All the benchmarks come with an associated benchmark dataset stored in [LangSmith](https://smith.langchain.com). To take advantage of the eval and debugging experience, [sign up](https://smith.langchain.com), and set your API key in your environment:
+
+```bash
+export LANGCHAIN_API_KEY=ls-...
+```
+
+## Repo Structure
+
+The package is located within [langchain_benchmarks](./langchain_benchmarks/). Check out the [docs](https://langchain-ai.github.io/langchain-benchmarks/index.html) for information on how to get starte.
+
+The other directories are legacy and may be moved in the future.
+
+
+## Archived
+
+Below are archived benchmarks that require cloning this repo to run.
+
+- [CSV Question Answering](https://github.com/langchain-ai/langchain-benchmarks/tree/main/archived/csv-qa)
+- [Extraction](https://github.com/langchain-ai/langchain-benchmarks/tree/main/archived/extraction)
+- [Q&A over the LangChain docs](https://github.com/langchain-ai/langchain-benchmarks/tree/main/archived/langchain-docs-benchmarking)
+- [Meta-evaluation of 'correctness' evaluators](https://github.com/langchain-ai/langchain-benchmarks/tree/main/archived/meta-evals)
+
+
+## Related
+
+- For cookbooks on other ways to test, debug, monitor, and improve your LLM applications, check out the [LangSmith docs](https://docs.smith.langchain.com/)
+- For information on building with LangChain, check out the [python documentation](https://python.langchain.com/docs/get_started/introduction) or [JS documentation](https://js.langchain.com/docs/get_started/introduction)
+
+```{toctree}
+:maxdepth: 2
+:caption: Introduction
+
+./notebooks/getting_started
+./notebooks/models
+./notebooks/datasets
+```
+
+
+```{toctree}
+:maxdepth: 0
+:caption: Tool Usage
+
+./notebooks/tool_usage/intro
+./notebooks/tool_usage/relational_data
+./notebooks/tool_usage/multiverse_math
+./notebooks/tool_usage/typewriter_1
+./notebooks/tool_usage/typewriter_26
+./notebooks/tool_usage/benchmark_all_tasks
+```
+
+```{toctree}
+:maxdepth: 0
+:caption: Extraction
+
+./notebooks/extraction/intro
+./notebooks/extraction/email
+./notebooks/extraction/chat_extraction
+./notebooks/extraction/high_cardinality
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: RAG
+
+./notebooks/retrieval/intro
+./notebooks/retrieval/langchain_docs_qa
+./notebooks/retrieval/semi_structured_benchmarking/semi_structured
+./notebooks/retrieval/semi_structured_benchmarking/ss_eval_chunk_sizes
+./notebooks/retrieval/semi_structured_benchmarking/ss_eval_long_context
+./notebooks/retrieval/semi_structured_benchmarking/ss_eval_multi_vector
+./notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval_baseline
+./notebooks/retrieval/multi_modal_benchmarking/multi_modal_eval
+./notebooks/retrieval/comparing_techniques
+```
+
+```{toctree}
+:maxdepth: 2
+:caption: Benchmarking Without LangSmith 
+./notebooks/run_without_langsmith
+```
diff --git a/_sources/notebooks/datasets.ipynb b/_sources/notebooks/datasets.ipynb
@@ -0,0 +1,226 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "033684fb-65b2-4586-a959-68c614741ca2",
+   "metadata": {},
+   "source": [
+    "# Datasets\n",
+    "\n",
+    "Here, we'll see how to work with LangSmith datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "474292e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U langchain-benchmarks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "6d272fbf-710e-4a49-a0da-67e010541905",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain_benchmarks import clone_public_dataset, download_public_dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18ee0f96-e5c4-4ae9-aebf-7d8b88c51662",
+   "metadata": {},
+   "source": [
+    "Let's first download the dataset to the local file system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "58b94f6d-0c91-4361-9b22-f758ffaa150a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fetching examples...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5a2fad8c0c3549ec96a3b38fe8a002b0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/21 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done fetching examples.\n"
+     ]
+    }
+   ],
+   "source": [
+    "download_public_dataset(\n",
+    "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "841db832-b0d3-4fd1-8531-1154ec9b3caa",
+   "metadata": {},
+   "source": [
+    "we can take a look at the first two examples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "664e90fc-af84-4c5f-a3dd-5d9ffe649650",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"0703a989-2693-4039-a1f6-7281fc1b4cb0\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"do bob and alice live in the same city?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.511629\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_city_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"no\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  },\n",
+      "  {\n",
+      "    \"created_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"dataset_id\": \"9f73165c-d333-4d14-8f59-bd7eede5db08\",\n",
+      "    \"id\": \"b258b95a-9524-4da7-b758-c5481109322d\",\n",
+      "    \"inputs\": {\n",
+      "      \"question\": \"Is it likely that Donna is outside with an umbrella at this time?\"\n",
+      "    },\n",
+      "    \"modified_at\": \"2023-11-15T15:26:53.491359\",\n",
+      "    \"outputs\": {\n",
+      "      \"expected_steps\": [\n",
+      "        \"find_users_by_name\",\n",
+      "        \"get_user_location\",\n",
+      "        \"get_current_time_for_location\",\n",
+      "        \"get_current_weather_for_location\"\n",
+      "      ],\n",
+      "      \"order_matters\": false,\n",
+      "      \"reference\": \"yes\"\n",
+      "    },\n",
+      "    \"runs\": []\n",
+      "  }\n",
+      "]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "with open(\"./e95d45da-aaa3-44b3-ba2b-7c15ff6e46f5.json\", \"r\", encoding=\"utf-8\") as f:\n",
+    "    print(json.dumps(json.load(f)[:2], indent=2, sort_keys=True))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c6cf01f-466b-406d-b4c7-2395747780fd",
+   "metadata": {},
+   "source": [
+    "We can also clone the dataset to our local tenant"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e4dea4df-2f1c-436b-a71c-49ffb2295ccc",
+   "metadata": {},
+   "source": [
+    "Executing this command will clone the dataset to your own LangSmith tenant. \n",
+    "For this to work you must have a [LangSmith account](https://smith.langchain.com/) set up."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7eb38ea6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "# Get from https://smith.langchain.com/settings\n",
+    "os.environ[\"LANGCHAIN_API_KEY\"] = \"ls_...\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18d0b905-2a6a-4752-a7cb-8653bd9049e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "clone_public_dataset(\n",
+    "    \"https://smith.langchain.com/public/452ccafc-18e1-4314-885b-edd735f17b9d/examples\",\n",
+    "    dataset_name=\"Agent Dataset\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}