diff --git a/notebooks/Analysis_of_Form_10_K_Using_Cohere_and_RAG.ipynb b/notebooks/Analysis_of_Form_10_K_Using_Cohere_and_RAG.ipynb
new file mode 100644
index 00000000..78066514
--- /dev/null
+++ b/notebooks/Analysis_of_Form_10_K_Using_Cohere_and_RAG.ipynb
@@ -0,0 +1,2845 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UzG0FP8fwmIZ"
+ },
+ "source": [
+ "# **Analysis of Form 10-K/10-Q Using Cohere and RAG**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NXzl9w_9Zf5K"
+ },
+ "source": [
+ "## **Getting Started**\n",
+ "\n",
+ "You may use this script to jumpstart financial analysis of 10-Ks or 10-Qs with Cohere's Command model.\n",
+ "\n",
+ "This cookbook relies on helpful tooling from LlamaIndex, as well as our Cohere SDK. If you're familiar with LlamaIndex, it should be easy to slot this process into your own productivity flows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "eYFvTs4mVpU4"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!sudo apt install tesseract-ocr poppler-utils\n",
+ "!pip install cohere langchain llama-index llama-index-embeddings-cohere llama-index-postprocessor-cohere-rerank pytesseract pdf2image"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "LYmEOVKCAuk7"
+ },
+ "outputs": [],
+ "source": [
+ "# Due to compatibility issues, we need to do imports like this\n",
+ "from llama_index.core.schema import TextNode\n",
+ "\n",
+ "%%capture\n",
+ "!pip install unstructured"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "Lc8CGMajDV9b",
+ "outputId": "5efe06b3-7eb5-490c-e9d3-b95b926358ed"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Enter your Cohere API key: ··········\n"
+ ]
+ }
+ ],
+ "source": [
+ "import cohere\n",
+ "from getpass import getpass\n",
+ "\n",
+ "# Set up Cohere client\n",
+ "COHERE_API_KEY = getpass(\"Enter your Cohere API key: \")\n",
+ "\n",
+ "# Instantiate a client to communicate with Cohere's API using our Python SDK\n",
+ "co = cohere.Client(COHERE_API_KEY)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "FfCtNkl6Z-eP"
+ },
+ "source": [
+ "## **Step 1: Loading a 10-K**\n",
+ "\n",
+ "You may run the following cells to load a 10-K that has already been preprocessed with OCR.\n",
+ "\n",
+ "> 💡 If you'd like to run the OCR pipeline yourself, you can find more info in the section titled **PDF to Text using OCR and `pdf2image`**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 71
+ },
+ "id": "6gkZ67Eh7l1A",
+ "outputId": "84406883-d4d4-44b5-c071-837410ee0d5a"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
+ "[nltk_data] /root/nltk_data...\n",
+ "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Using langchain here since they have access to the Unstructured Data Loader powered by unstructured.io\n",
+ "from langchain_community.document_loaders import UnstructuredURLLoader\n",
+ "\n",
+ "# Load up Airbnb's 10-K from this past fiscal year (filed in 2024)\n",
+ "# Feel free to fill in some other EDGAR path\n",
+ "url = \"https://www.sec.gov/Archives/edgar/data/1559720/000155972024000006/abnb-20231231.htm\"\n",
+ "loader = UnstructuredURLLoader(urls=[url], headers={\"User-Agent\": \"cohere cohere@cohere.com\"})\n",
+ "documents = loader.load()\n",
+ "\n",
+ "edgar_10k = documents[0].page_content\n",
+ "\n",
+ "# Load the document(s) as simple text nodes, to be passed to the tokenization processor\n",
+ "nodes = [TextNode(text=document.page_content, id_=f\"doc_{i}\") for i, document in enumerate(documents)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "W2PStuqwPPUM"
+ },
+ "source": [
+ "We'll need to convert the text into chunks of a certain size in order for the Cohere embedding model to properly ingest them down the line.\n",
+ "\n",
+ "We choose to use LlamaIndex's `SentenceSplitter` in this case in order to get these chunks. We must pass a tokenization callable, which we can do using the `transformers` library.\n",
+ "\n",
+ "You may also apply further transformations from the LlamaIndex repo if you so choose. Take a look at the [docs](https://docs.llamaindex.ai/en/stable/understanding/loading/loading.html) for inspiration on what is possible with transformations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 552,
+ "referenced_widgets": [
+ "e2146d738e0d4fe39af19bfb22da2584",
+ "8670211888514256a54240d002135917",
+ "88b9a3c1bc78462ea149b94d1ca08e59",
+ "5a94eb68326b4466b7c19ad67f8f5ba6",
+ "ab441cef6118450f9bfbcc29f8f34d4f",
+ "016a793bd7684acc9733f1e3160bd4d6",
+ "c8330e6698cd41868efc23be3962db7d",
+ "7e9939b2a8394aa3ae4dd1a90d9517ce",
+ "24055747d0e44bf29f80017117e332cc",
+ "59f4d94a1be24ef7a1366b7220a9c9de",
+ "6ba07312eb854ae9a88e6cfb886b65c3",
+ "01d81bc2d81741e6bb0a82ba78dc2b21",
+ "76d51ab196f54b398dcdc574968331fa",
+ "b9f6e9e50e224cf89b902e419dae269a",
+ "1271483cc9da41b28637dce1a40fa538",
+ "78a1cb4ec6cc4044b2149cb84f34979c",
+ "403ebbf8e6da4c188c42e83ee01db07b",
+ "f8e3c447794448529391f9660d419632",
+ "a7b1f4318e3242e591b15147d699c656",
+ "7d7dd120240043c1aec77af517bf7add",
+ "204957d62b7a4f7db7fa195a3d042b42",
+ "930d287b503b4eeeb527c6facc97ca30",
+ "88fc0e784a71447abbba8273f6fcdace",
+ "bb120e29c7fb49e4823daf66f048e95b",
+ "012491a19a8143f2adc1a95c5d20e488",
+ "a9cd4375dfd94af4ab2c004e9dbe6fa7",
+ "21229dbb1f414913b8ad230648ab76e9",
+ "d102def58f554d48890008185d17af96",
+ "79a852590a0444bebad62f2c679e72c6",
+ "c0c90d8f84b64a11a48e81ba8dec7044",
+ "1167e72dd2184c6ca8c670b27c73a27d",
+ "af93bfd421504e3eada6c8c884869e2f",
+ "782b11c627be41b2b67e2774ee7fcf0b",
+ "159f81e1ea4c40a394b6d796527c7c4a",
+ "96997c9c8f3a4237b1d025252c3c2358",
+ "28b25e6395c54e8e9494f403953065bb",
+ "cf03a6a84e1345aab6d2d78f9ae5f34a",
+ "5a15923788d74d98a70646e1e921831e",
+ "81d523a8f29c491aaed45a199260b414",
+ "56cd150b53234f53b28dec80ebeb33a8",
+ "21c71aa09fae458fa483d58019506d46",
+ "b3cb567ec4934a6d935de606af921a61",
+ "af63d2b1e22c40629b2fe585813a2bf4",
+ "87d5a00cc4a2460e836ddd22cff918dd",
+ "a65823cef8a648cebad7e61f012de1f1",
+ "6643b53bdbe74a6491a3db6ec06446e9",
+ "93d9e20a78774bb485ceca31d4453204",
+ "41b58953474f49f0915d14801fb18174",
+ "17e8c3567f1a4d6aaa485fd1014977ae",
+ "0e300e8dbea143d688b08c32883d2d29",
+ "2e094e1165554834a0cc19ebffe93311",
+ "2a9c8f7bb54e4814825bddcbb10b3482",
+ "5f5e73e41dae4625a98a6eee6120a7d4",
+ "efb2bf269c1d4ace81ff4a6fabbfb3b6",
+ "fc96c077774241a58d24ac42dd52df7e"
+ ]
+ },
+ "id": "p_1mXiVZBZu2",
+ "outputId": "f0146d70-4a6f-4821-ed70-11200a6efd49"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e2146d738e0d4fe39af19bfb22da2584",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/7.92k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "01d81bc2d81741e6bb0a82ba78dc2b21",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenization_cohere_fast.py: 0%| | 0.00/43.7k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "88fc0e784a71447abbba8273f6fcdace",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "configuration_cohere.py: 0%| | 0.00/7.37k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "A new version of the following files was downloaded from https://huggingface.co/CohereForAI/c4ai-command-r-v01:\n",
+ "- configuration_cohere.py\n",
+ ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
+ "A new version of the following files was downloaded from https://huggingface.co/CohereForAI/c4ai-command-r-v01:\n",
+ "- tokenization_cohere_fast.py\n",
+ "- configuration_cohere.py\n",
+ ". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "159f81e1ea4c40a394b6d796527c7c4a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/12.8M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a65823cef8a648cebad7e61f012de1f1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/429 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from llama_index.core.ingestion import IngestionPipeline\n",
+ "from llama_index.core.node_parser import SentenceSplitter\n",
+ "\n",
+ "from transformers import AutoTokenizer\n",
+ "\n",
+ "model_id = \"CohereForAI/c4ai-command-r-v01\"\n",
+ "tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)\n",
+ "\n",
+ "# TODO: replace with a HF implementation so this is much faster. We'll\n",
+ "# presumably release it when we OS the model\n",
+ "tokenizer_fn = lambda x: tokenizer(x).input_ids if len(x) > 0 else []\n",
+ "\n",
+ "pipeline = IngestionPipeline(\n",
+ " transformations=[\n",
+ " SentenceSplitter(chunk_size=512, chunk_overlap=0, tokenizer=tokenizer_fn)\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# Run the pipeline to transform the text\n",
+ "nodes = pipeline.run(nodes=nodes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "573Sl4Ijfura"
+ },
+ "source": [
+ "## **Step 2: Load document into a LlamaIndex vector store**\n",
+ "\n",
+ "Loading the document into a LlamaIndex vector store will allow us to use the Cohere embedding model and rerank model to retrieve the relevant parts of the form to pass into Command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "T3-T32hQ-cYl",
+ "outputId": "855d765f-8d45-4a99-ab76-0f30d8ada3e0"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from llama_index.core import Settings, VectorStoreIndex\n",
+ "\n",
+ "from llama_index.postprocessor.cohere_rerank import CohereRerank\n",
+ "\n",
+ "from llama_index.embeddings.cohere import CohereEmbedding\n",
+ "\n",
+ "# Instantiate the embedding model\n",
+ "embed_model = CohereEmbedding(cohere_api_key=COHERE_API_KEY)\n",
+ "\n",
+ "# Global settings\n",
+ "Settings.chunk_size = 512\n",
+ "Settings.embed_model = embed_model\n",
+ "\n",
+ "# Create the vector store\n",
+ "index = VectorStoreIndex(nodes)\n",
+ "\n",
+ "retriever = index.as_retriever(similarity_top_k=30) # Change to whatever top_k you want\n",
+ "\n",
+ "# Instantiate the reranker\n",
+ "rerank = CohereRerank(api_key=COHERE_API_KEY, top_n=15)\n",
+ "\n",
+ "# Function `retrieve` is ready, using both Cohere embeddings for similarity search as well as\n",
+ "retrieve = lambda query: rerank.postprocess_nodes(retriever.retrieve(query), query_str=query)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "19QPS8pzQicf"
+ },
+ "source": [
+ "## **Step 3: Query generation and retrieval**\n",
+ "\n",
+ "In order to do RAG, we need a query or a set of queries to actually _do_ the retrieval step. As is standard in RAG settings, we'll use Command to generate those queries for us. Then, we'll use those queries along with the LlamaIndex retriever we built earlier to retrieve the most relevant pieces of the 10-K.\n",
+ "\n",
+ "To learn more about document mode and query generation, check out [our documentation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "XHCPxdvrFliD",
+ "outputId": "85cbbe95-cb88-49ec-d4ea-f7b5a15e1816"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "PROMPT = \"List the overall revenue numbers for 2021, 2022, and 2023 in the 10-K as bullet points, then explain the revenue growth trends.\"\n",
+ "\n",
+ "# Get queries to run against our index from the command-nightly model\n",
+ "r = co.chat(PROMPT, model=\"command-r\", search_queries_only=True)\n",
+ "if r.search_queries:\n",
+ " queries = [q[\"text\"] for q in r.search_queries]\n",
+ "else:\n",
+ " print(\"No queries returned by the model\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nyLVmUBART3U"
+ },
+ "source": [
+ "Now, with the queries in hand, we search against our vector index."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "k4t8s4xUX51B",
+ "outputId": "e739b4a6-b7b4-4870-8d45-2c921b13bcf1"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Convenience function for formatting documents\n",
+ "def format_for_cohere_client(nodes_):\n",
+ " return [\n",
+ " {\n",
+ " \"text\": node.node.text,\n",
+ " \"llamaindex_id\": node.node.id_,\n",
+ " }\n",
+ " for node\n",
+ " in nodes_\n",
+ " ]\n",
+ "\n",
+ "\n",
+ "documents = []\n",
+ "# Retrieve a set of chunks from the vector index and append them to the list of\n",
+ "# documents that should be included in the final RAG step\n",
+ "for query in queries:\n",
+ " ret_nodes = retrieve(query)\n",
+ " documents.extend(format_for_cohere_client(ret_nodes))\n",
+ "\n",
+ "# One final dedpulication step in case multiple queries return the same chunk\n",
+ "documents = [dict(t, id=f\"doc_{i}\") for i, t in enumerate({tuple(d.items()) for d in documents})]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "kJAlJgocRxBI"
+ },
+ "source": [
+ "## **Step 4: Make a RAG request to Command using document mode**\n",
+ "\n",
+ "Now that we have our nicely formatted chunks from the 10-K, we can pass them directly into Command using the Cohere SDK. By passing the chunks into the `documents` kwarg, we enable document mode, which will perform grounded inference on the documents you pass in.\n",
+ "\n",
+ "You can see this for yourself by inspecting the `response.citations` field to check where the model is citing from.\n",
+ "\n",
+ "You can learn more about the `chat` endpoint by checking out the API reference [here](https://docs.cohere.com/reference/chat)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 267
+ },
+ "id": "NBfQoZcXYdFc",
+ "outputId": "b3a4156b-2749-4aaa-8b07-7c331388183f"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here are the overall revenue numbers for the years 2021, 2022, and 2023 as bullet points:\n",
+ "- 2021: $5,992 million\n",
+ "- 2022: $8,399 million\n",
+ "- 2023: $9,917 million\n",
+ "\n",
+ "Revenue increased by 18% in 2023 compared to 2022, primarily due to a 14% increase in Nights and Experiences Booked, which reached 54.5 million. This, combined with higher average daily rates, resulted in a 16% increase in Gross Booking Value, which reached $10.0 billion. \n",
+ "\n",
+ "The revenue growth trend demonstrates sustained strong travel demand. On a constant-currency basis, revenue increased by 17% in 2023 compared to the previous year.\n",
+ "\n",
+ "Other factors influencing the company's financial performance are described outside of the revenue growth trends.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Make a request to the model\n",
+ "response = co.chat(\n",
+ " message=PROMPT,\n",
+ " model=\"command-r\",\n",
+ " temperature=0.3,\n",
+ " documents=documents,\n",
+ " prompt_truncation=\"AUTO\"\n",
+ ")\n",
+ "\n",
+ "print(response.text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 267
+ },
+ "id": "LHi1PDFpWj5p",
+ "outputId": "1d735808-b758-4535-a419-df91225b9bfb"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Here are the overall revenue numbers for the years 2021, 2022, and 2023 as bullet points:\n",
+ "- 2021: $5,992 million [13]\n",
+ "- 2022: $8,399 million [13]\n",
+ "- 2023: $9,917 million [13]\n",
+ "\n",
+ "Revenue increased by 18% in 2023 [11] compared to 2022, primarily due to a 14% increase in Nights and Experiences Booked [11], which reached 54.5 million. [11] This, combined with higher average daily rates [11], resulted in a 16% increase in Gross Booking Value [11], which reached $10.0 billion. [11] \n",
+ "\n",
+ "The revenue growth trend demonstrates sustained strong travel demand. [11] On a constant-currency basis [11], revenue increased by 17% in 2023 [11] compared to the previous year.\n",
+ "\n",
+ "Other factors [8, 14] influencing the company's financial performance are described outside of the revenue growth trends. [8, 14]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Helper function for displaying response WITH citations\n",
+ "def insert_citations(text: str, citations: list[dict]):\n",
+ " \"\"\"\n",
+ " A helper function to pretty print citations.\n",
+ " \"\"\"\n",
+ " offset = 0\n",
+ " # Process citations in the order they were provided\n",
+ " for citation in citations:\n",
+ " # Adjust start/end with offset\n",
+ " start, end = citation['start'] + offset, citation['end'] + offset\n",
+ " cited_docs = [doc[4:] for doc in citation[\"document_ids\"]]\n",
+ " # Shorten citations if they're too long for convenience\n",
+ " if len(cited_docs) > 3:\n",
+ " placeholder = \"[\" + \", \".join(cited_docs[:3]) + \"...]\"\n",
+ " else:\n",
+ " placeholder = \"[\" + \", \".join(cited_docs) + \"]\"\n",
+ " # ^ doc[4:] removes the 'doc_' prefix, and leaves the quoted document\n",
+ " modification = f'{text[start:end]} {placeholder}'\n",
+ " # Replace the cited text with its bolded version + placeholder\n",
+ " text = text[:start] + modification + text[end:]\n",
+ " # Update the offset for subsequent replacements\n",
+ " offset += len(modification) - (end - start)\n",
+ "\n",
+ " return text\n",
+ "\n",
+ "print(insert_citations(response.text, response.citations))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "aI4mJqMMKE3N"
+ },
+ "source": [
+ "# **Appendix**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yOeXqm1-6vXh"
+ },
+ "source": [
+ "## PDF to Text using OCR and `pdf2image`\n",
+ "\n",
+ "This method will be required for any PDFs you have that need to be converted to text.\n",
+ "\n",
+ "**WARNING**: this process can take a long time without the proper optimizations. We have provided a snippet for your use below, but use at your own risk."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hv91DjU77a3Q"
+ },
+ "source": [
+ "To go from PDF to text with PyTesseract, there is an intermediary step of converting the PDF to an image first, then passing that image into the OCR package, as OCR is usually only available for images.\n",
+ "\n",
+ "To do this, we use `pdf2image`, which uses `poppler` behind the scenes to convert the PDF into a PNG. From there, we can pass the image (which is a PIL Image object) directly into the OCR tool."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "bK8sMxiTeRTB"
+ },
+ "outputs": [],
+ "source": [
+ "import pytesseract\n",
+ "from pdf2image import convert_from_path\n",
+ "\n",
+ "# pdf2image extracts as a list of PIL.Image objects\n",
+ "# TODO: host this PDF somewhere\n",
+ "pages = convert_from_path(\"/content/uber_10k.pdf\")\n",
+ "\n",
+ "# We access the only page in this sample PDF by indexing at 0\n",
+ "pages = [pytesseract.image_to_string(page) for page in pages]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7w7HyFL9iaZI"
+ },
+ "source": [
+ "## Token count / price comparison and latency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "igUBVGF0cy0D",
+ "outputId": "73c4d4d5-f29c-4403-8b08-c2567c39b902"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def get_response(prompt, rag):\n",
+ " if rag:\n",
+ " # Get queries to run against our index from the command-nightly model\n",
+ " r = co.chat(prompt, model=\"command-r\", search_queries_only=True)\n",
+ " if r.search_queries:\n",
+ " queries = [q[\"text\"] for q in r.search_queries]\n",
+ " else:\n",
+ " print(\"No queries returned by the model\")\n",
+ "\n",
+ " documents = []\n",
+ " # Retrieve a set of chunks from the vector index and append them to the list of\n",
+ " # documents that should be included in the final RAG step\n",
+ " for query in queries:\n",
+ " ret_nodes = retrieve(query)\n",
+ " documents.extend(format_for_cohere_client(ret_nodes))\n",
+ "\n",
+ " # One final dedpulication step in case multiple queries return the same chunk\n",
+ " documents = [dict(t) for t in {tuple(d.items()) for d in documents}]\n",
+ "\n",
+ " # Make a request to the model\n",
+ " response = co.chat(\n",
+ " message=prompt,\n",
+ " model=\"command-r\",\n",
+ " temperature=0.3,\n",
+ " documents=documents,\n",
+ " prompt_truncation=\"AUTO\"\n",
+ " )\n",
+ " else:\n",
+ " response = co.chat(\n",
+ " message=prompt,\n",
+ " model=\"command-r\",\n",
+ " temperature=0.3,\n",
+ " )\n",
+ "\n",
+ " return response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "lX0YByK2eIeF",
+ "outputId": "a29cdf89-8b88-4c37-901c-6b1ce88cfa5e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "prompt_template = \"\"\"# financial form 10-K\n",
+ "{tenk}\n",
+ "\n",
+ "# question\n",
+ "{question}\"\"\"\n",
+ "\n",
+ "full_context_prompt = prompt_template.format(tenk=edgar_10k, question=PROMPT)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "_s_9t57wfRCy",
+ "outputId": "babf0cb3-a719-467d-ec5f-80e8477f2e40"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "r1 = get_response(PROMPT, rag=True)\n",
+ "r2 = get_response(full_context_prompt, rag=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "INxf-xvdiOgF",
+ "outputId": "8632e4ce-e945-44d6-ed8d-7e6a4b923348"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def get_price(r):\n",
+ " return (r.token_count[\"prompt_tokens\"] * 0.5 / 10e6) + (r.token_count[\"response_tokens\"] * 1.5 / 10e6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "uuUUZeewiSV0",
+ "outputId": "991125f1-5892-4744-e919-7b46930a0272"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RAG is 93% cheaper than full context\n"
+ ]
+ }
+ ],
+ "source": [
+ "rag_price = get_price(r1)\n",
+ "full_context_price = get_price(r2)\n",
+ "\n",
+ "print(f\"RAG is {(full_context_price - rag_price) / full_context_price:.0%} cheaper than full context\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "8sqRqK8ekKAH",
+ "outputId": "4d44b64c-50cc-42f0-e2f1-c38205e42c04"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "14.9 s ± 1.4 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%timeit get_response(PROMPT, rag=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "r9Ck-k_gCNJ1",
+ "outputId": "fe4cdd1a-a4ac-4e20-8c8d-a3ef05ab8bcc"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "22.7 s ± 7.43 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%timeit get_response(full_context_prompt, rag=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Y7sLarNgFGlV"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "012491a19a8143f2adc1a95c5d20e488": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c0c90d8f84b64a11a48e81ba8dec7044",
+ "max": 7366,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1167e72dd2184c6ca8c670b27c73a27d",
+ "value": 7366
+ }
+ },
+ "016a793bd7684acc9733f1e3160bd4d6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "01d81bc2d81741e6bb0a82ba78dc2b21": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_76d51ab196f54b398dcdc574968331fa",
+ "IPY_MODEL_b9f6e9e50e224cf89b902e419dae269a",
+ "IPY_MODEL_1271483cc9da41b28637dce1a40fa538"
+ ],
+ "layout": "IPY_MODEL_78a1cb4ec6cc4044b2149cb84f34979c"
+ }
+ },
+ "0e300e8dbea143d688b08c32883d2d29": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1167e72dd2184c6ca8c670b27c73a27d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "1271483cc9da41b28637dce1a40fa538": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_204957d62b7a4f7db7fa195a3d042b42",
+ "placeholder": "",
+ "style": "IPY_MODEL_930d287b503b4eeeb527c6facc97ca30",
+ "value": " 43.7k/43.7k [00:00<00:00, 1.97MB/s]"
+ }
+ },
+ "159f81e1ea4c40a394b6d796527c7c4a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_96997c9c8f3a4237b1d025252c3c2358",
+ "IPY_MODEL_28b25e6395c54e8e9494f403953065bb",
+ "IPY_MODEL_cf03a6a84e1345aab6d2d78f9ae5f34a"
+ ],
+ "layout": "IPY_MODEL_5a15923788d74d98a70646e1e921831e"
+ }
+ },
+ "17e8c3567f1a4d6aaa485fd1014977ae": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "204957d62b7a4f7db7fa195a3d042b42": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "21229dbb1f414913b8ad230648ab76e9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "21c71aa09fae458fa483d58019506d46": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "24055747d0e44bf29f80017117e332cc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "28b25e6395c54e8e9494f403953065bb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_21c71aa09fae458fa483d58019506d46",
+ "max": 12777406,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b3cb567ec4934a6d935de606af921a61",
+ "value": 12777406
+ }
+ },
+ "2a9c8f7bb54e4814825bddcbb10b3482": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e094e1165554834a0cc19ebffe93311": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "403ebbf8e6da4c188c42e83ee01db07b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "41b58953474f49f0915d14801fb18174": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_efb2bf269c1d4ace81ff4a6fabbfb3b6",
+ "placeholder": "",
+ "style": "IPY_MODEL_fc96c077774241a58d24ac42dd52df7e",
+ "value": " 429/429 [00:00<00:00, 20.7kB/s]"
+ }
+ },
+ "56cd150b53234f53b28dec80ebeb33a8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "59f4d94a1be24ef7a1366b7220a9c9de": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5a15923788d74d98a70646e1e921831e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5a94eb68326b4466b7c19ad67f8f5ba6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_59f4d94a1be24ef7a1366b7220a9c9de",
+ "placeholder": "",
+ "style": "IPY_MODEL_6ba07312eb854ae9a88e6cfb886b65c3",
+ "value": " 7.92k/7.92k [00:00<00:00, 366kB/s]"
+ }
+ },
+ "5f5e73e41dae4625a98a6eee6120a7d4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "6643b53bdbe74a6491a3db6ec06446e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0e300e8dbea143d688b08c32883d2d29",
+ "placeholder": "",
+ "style": "IPY_MODEL_2e094e1165554834a0cc19ebffe93311",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "6ba07312eb854ae9a88e6cfb886b65c3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "76d51ab196f54b398dcdc574968331fa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_403ebbf8e6da4c188c42e83ee01db07b",
+ "placeholder": "",
+ "style": "IPY_MODEL_f8e3c447794448529391f9660d419632",
+ "value": "tokenization_cohere_fast.py: 100%"
+ }
+ },
+ "782b11c627be41b2b67e2774ee7fcf0b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "78a1cb4ec6cc4044b2149cb84f34979c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "79a852590a0444bebad62f2c679e72c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7d7dd120240043c1aec77af517bf7add": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "7e9939b2a8394aa3ae4dd1a90d9517ce": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "81d523a8f29c491aaed45a199260b414": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8670211888514256a54240d002135917": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_016a793bd7684acc9733f1e3160bd4d6",
+ "placeholder": "",
+ "style": "IPY_MODEL_c8330e6698cd41868efc23be3962db7d",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "87d5a00cc4a2460e836ddd22cff918dd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "88b9a3c1bc78462ea149b94d1ca08e59": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7e9939b2a8394aa3ae4dd1a90d9517ce",
+ "max": 7916,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_24055747d0e44bf29f80017117e332cc",
+ "value": 7916
+ }
+ },
+ "88fc0e784a71447abbba8273f6fcdace": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_bb120e29c7fb49e4823daf66f048e95b",
+ "IPY_MODEL_012491a19a8143f2adc1a95c5d20e488",
+ "IPY_MODEL_a9cd4375dfd94af4ab2c004e9dbe6fa7"
+ ],
+ "layout": "IPY_MODEL_21229dbb1f414913b8ad230648ab76e9"
+ }
+ },
+ "930d287b503b4eeeb527c6facc97ca30": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "93d9e20a78774bb485ceca31d4453204": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2a9c8f7bb54e4814825bddcbb10b3482",
+ "max": 429,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5f5e73e41dae4625a98a6eee6120a7d4",
+ "value": 429
+ }
+ },
+ "96997c9c8f3a4237b1d025252c3c2358": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_81d523a8f29c491aaed45a199260b414",
+ "placeholder": "",
+ "style": "IPY_MODEL_56cd150b53234f53b28dec80ebeb33a8",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "a65823cef8a648cebad7e61f012de1f1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6643b53bdbe74a6491a3db6ec06446e9",
+ "IPY_MODEL_93d9e20a78774bb485ceca31d4453204",
+ "IPY_MODEL_41b58953474f49f0915d14801fb18174"
+ ],
+ "layout": "IPY_MODEL_17e8c3567f1a4d6aaa485fd1014977ae"
+ }
+ },
+ "a7b1f4318e3242e591b15147d699c656": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a9cd4375dfd94af4ab2c004e9dbe6fa7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_af93bfd421504e3eada6c8c884869e2f",
+ "placeholder": "",
+ "style": "IPY_MODEL_782b11c627be41b2b67e2774ee7fcf0b",
+ "value": " 7.37k/7.37k [00:00<00:00, 373kB/s]"
+ }
+ },
+ "ab441cef6118450f9bfbcc29f8f34d4f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "af63d2b1e22c40629b2fe585813a2bf4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "af93bfd421504e3eada6c8c884869e2f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b3cb567ec4934a6d935de606af921a61": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b9f6e9e50e224cf89b902e419dae269a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a7b1f4318e3242e591b15147d699c656",
+ "max": 43727,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_7d7dd120240043c1aec77af517bf7add",
+ "value": 43727
+ }
+ },
+ "bb120e29c7fb49e4823daf66f048e95b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d102def58f554d48890008185d17af96",
+ "placeholder": "",
+ "style": "IPY_MODEL_79a852590a0444bebad62f2c679e72c6",
+ "value": "configuration_cohere.py: 100%"
+ }
+ },
+ "c0c90d8f84b64a11a48e81ba8dec7044": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c8330e6698cd41868efc23be3962db7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "cf03a6a84e1345aab6d2d78f9ae5f34a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_af63d2b1e22c40629b2fe585813a2bf4",
+ "placeholder": "",
+ "style": "IPY_MODEL_87d5a00cc4a2460e836ddd22cff918dd",
+ "value": " 12.8M/12.8M [00:00<00:00, 51.7MB/s]"
+ }
+ },
+ "d102def58f554d48890008185d17af96": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e2146d738e0d4fe39af19bfb22da2584": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_8670211888514256a54240d002135917",
+ "IPY_MODEL_88b9a3c1bc78462ea149b94d1ca08e59",
+ "IPY_MODEL_5a94eb68326b4466b7c19ad67f8f5ba6"
+ ],
+ "layout": "IPY_MODEL_ab441cef6118450f9bfbcc29f8f34d4f"
+ }
+ },
+ "efb2bf269c1d4ace81ff4a6fabbfb3b6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f8e3c447794448529391f9660d419632": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fc96c077774241a58d24ac42dd52df7e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}