Merge pull request #113 from cohere-ai/chat-rag-upd

Update chat rag notebooks
cohere-ai · Jan 30, 2024 · 0b40861 · 0b40861
2 parents 07a6135 + 09c9854
commit 0b40861
Show file tree

Hide file tree

Showing 3 changed files with 161 additions and 121 deletions.
diff --git a/examples/chat_rag_connector/RAG_Chatbot_with_Connectors.ipynb b/examples/chat_rag_connector/RAG_Chatbot_with_Connectors.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,8 +11,7 @@
     "import uuid\n",
     "from typing import List, Dict\n",
     "\n",
-    "COHERE_API_KEY = os.getenv(\"COHERE_API_KEY\")\n",
-    "co = cohere.Client(COHERE_API_KEY)"
+    "co = cohere.Client(\"COHERE_API_KEY\")"
    ]
   },
   {
@@ -79,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,7 +97,8 @@
     "                        )\n",
     "\n",
     "        for event in response:\n",
-    "                yield event"
+    "            yield event\n",
+    "        yield response"
    ]
   },
   {
@@ -110,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,31 +136,33 @@
     "            # Print the chatbot response\n",
     "            print(\"Chatbot:\")\n",
     "            \n",
-    "            documents = []\n",
-    "            documents_flag = False\n",
     "            citations_flag = False\n",
     "            \n",
     "            for event in response:\n",
-    "                # Documents\n",
-    "                if event.event_type == \"search-results\":\n",
-    "                    documents_flag = True\n",
-    "                    documents = event.documents\n",
-    "                    \n",
+    "                stream_type = type(event).__name__\n",
+    "                \n",
     "                # Text\n",
-    "                if event.event_type == \"text-generation\":\n",
-    "                    print(event.text, end=\"\")        \n",
+    "                if stream_type == \"StreamTextGeneration\":\n",
+    "                    print(event.text, end=\"\")\n",
     "\n",
     "                # Citations\n",
-    "                if event.event_type == \"citation-generation\":\n",
+    "                if stream_type == \"StreamCitationGeneration\":\n",
     "                    if not citations_flag:\n",
     "                        print(\"\\n\\nCITATIONS:\")\n",
     "                        citations_flag = True\n",
-    "                    print(event.citations)\n",
-    "            \n",
-    "            if documents_flag:\n",
-    "                print(\"\\n\\nDOCUMENTS:\")\n",
-    "                for d in documents:\n",
-    "                    print(f'{d[\"title\"]} ({d[\"id\"]}). URL: {d[\"url\"]}')\n",
+    "                    print(event.citations[0])\n",
+    "                \n",
+    "                # Documents\n",
+    "                if citations_flag:\n",
+    "                    if stream_type == \"StreamingChat\":\n",
+    "                        print(\"\\n\\nDOCUMENTS:\")\n",
+    "                        documents = [{'id': doc['id'],\n",
+    "                                      'text': doc['text'][:50] + '...',\n",
+    "                                      'title': doc['title'],\n",
+    "                                      'url': doc['url']} \n",
+    "                                      for doc in event.documents]\n",
+    "                        for doc in documents:\n",
+    "                            print(doc)\n",
     "\n",
     "            print(f\"\\n{'-'*100}\\n\")"
    ]
@@ -174,30 +176,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "User: What is attention\n",
+      "User: What are sentence embeddings\n",
+      "Chatbot:\n",
+      "Sentence embeddings are the building blocks of language models. They associate each sentence with a vector (list of numbers) in a way that similar sentences are assigned similar vectors. These vectors are composed of numbers and carry important properties of the sentence. The embeddings act as a form of translation between languages as well, as they provide a relatable vector for similar sentences in different languages.\n",
+      "\n",
+      "CITATIONS:\n",
+      "{'start': 69, 'end': 124, 'text': 'associate each sentence with a vector (list of numbers)', 'document_ids': ['demo-conn-e5y5ps_0', 'demo-conn-e5y5ps_1', 'demo-conn-e5y5ps_2']}\n",
+      "{'start': 139, 'end': 186, 'text': 'similar sentences are assigned similar vectors.', 'document_ids': ['demo-conn-e5y5ps_0', 'demo-conn-e5y5ps_1']}\n",
+      "{'start': 235, 'end': 272, 'text': 'important properties of the sentence.', 'document_ids': ['demo-conn-e5y5ps_1', 'demo-conn-e5y5ps_2']}\n",
+      "\n",
+      "\n",
+      "DOCUMENTS:\n",
+      "{'id': 'demo-conn-e5y5ps_0', 'text': 'In the previous chapter, we learned that sentence ...', 'title': 'Similarity Between Words and Sentences', 'url': 'https://docs.cohere.com/docs/similarity-between-words-and-sentences'}\n",
+      "{'id': 'demo-conn-e5y5ps_1', 'text': 'This is where sentence embeddings come into play. ...', 'title': 'Text Embeddings', 'url': 'https://docs.cohere.com/docs/text-embeddings'}\n",
+      "{'id': 'demo-conn-e5y5ps_2', 'text': 'Sentence embeddings are even more powerful, as the...', 'title': 'Similarity Between Words and Sentences', 'url': 'https://docs.cohere.com/docs/similarity-between-words-and-sentences'}\n",
+      "\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "User: How is it different from word embeddings\n",
       "Chatbot:\n",
-      "Attention is a technique used in language models to provide context to each word in a sentence or text, based on the other words. Attention plays a crucial role in transformer models, which can help improve large language models.\n",
+      "The primary distinction between word embeddings and sentence embeddings is that the latter assigns a vector to every sentence whereas the former does the same thing but for individual words. \n",
+      "\n",
+      "Both embeddings are similar in the sense that they associate vectors in a way that similar items (words or sentences) are mapped to similar vectors. Word embeddings are a subset of sentence embeddings.\n",
       "\n",
       "CITATIONS:\n",
-      "[{'start': 60, 'end': 67, 'text': 'context', 'document_ids': ['demo-conn-tm17qr_0', 'demo-conn-tm17qr_1', 'demo-conn-tm17qr_2']}]\n",
-      "[{'start': 68, 'end': 102, 'text': 'to each word in a sentence or text', 'document_ids': ['demo-conn-tm17qr_1', 'demo-conn-tm17qr_2']}]\n",
-      "[{'start': 117, 'end': 129, 'text': 'other words.', 'document_ids': ['demo-conn-tm17qr_1']}]\n",
-      "[{'start': 148, 'end': 160, 'text': 'crucial role', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
-      "[{'start': 164, 'end': 182, 'text': 'transformer models', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
-      "[{'start': 199, 'end': 229, 'text': 'improve large language models.', 'document_ids': ['demo-conn-tm17qr_2']}]\n",
+      "{'start': 91, 'end': 125, 'text': 'assigns a vector to every sentence', 'document_ids': ['demo-conn-e5y5ps_0', 'demo-conn-e5y5ps_1']}\n",
+      "{'start': 165, 'end': 190, 'text': 'but for individual words.', 'document_ids': ['demo-conn-e5y5ps_0']}\n",
+      "{'start': 244, 'end': 261, 'text': 'associate vectors', 'document_ids': ['demo-conn-e5y5ps_0', 'demo-conn-e5y5ps_1']}\n",
+      "{'start': 315, 'end': 341, 'text': 'mapped to similar vectors.', 'document_ids': ['demo-conn-e5y5ps_0', 'demo-conn-e5y5ps_1']}\n",
+      "{'start': 342, 'end': 394, 'text': 'Word embeddings are a subset of sentence embeddings.', 'document_ids': ['demo-conn-e5y5ps_1']}\n",
       "\n",
       "\n",
       "DOCUMENTS:\n",
-      "Transformer Models (demo-conn-tm17qr_0). URL: https://docs.cohere.com/docs/transformer-models\n",
-      "Transformer Models (demo-conn-tm17qr_1). URL: https://docs.cohere.com/docs/transformer-models\n",
-      "Transformer Models (demo-conn-tm17qr_2). URL: https://docs.cohere.com/docs/transformer-models\n",
+      "{'id': 'demo-conn-e5y5ps_0', 'text': 'In the previous chapters, you learned about word a...', 'title': 'The Attention Mechanism', 'url': 'https://docs.cohere.com/docs/the-attention-mechanism'}\n",
+      "{'id': 'demo-conn-e5y5ps_1', 'text': 'This is where sentence embeddings come into play. ...', 'title': 'Text Embeddings', 'url': 'https://docs.cohere.com/docs/text-embeddings'}\n",
       "\n",
       "----------------------------------------------------------------------------------------------------\n",
       "\n",
@@ -207,7 +226,7 @@
    ],
    "source": [
     "# Define connectors\n",
-    "connectors = [\"demo-conn-tm17qr\"]\n",
+    "connectors = [\"demo-conn-e5y5ps\"]\n",
     "\n",
     "# Create an instance of the Chatbot class by supplying the connectors\n",
     "chatbot = Chatbot(connectors)\n",