docs: Updated docstrings

- Updated examples to include JS - Changed examples to use nomic-embed-text instead of llama2
amikos-tech · Mar 25, 2024 · 0c82ac4 · 0c82ac4
1 parent 2cac735
commit 0c82ac4
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 27 deletions.
diff --git a/chromadb/test/ef/test_ollama_ef.py b/chromadb/test/ef/test_ollama_ef.py
@@ -27,7 +27,7 @@ def test_ollama() -> None:
     except (HTTPError, ConnectionError):
         pytest.skip("Ollama server not running. Skipping test.")
     ef = OllamaEmbeddingFunction(
-        model_name=os.environ.get("OLLAMA_MODEL") or "llama2",
+        model_name=os.environ.get("OLLAMA_MODEL") or "nomic-embed-text",
         url=f"{os.environ.get('OLLAMA_SERVER_URL')}/embeddings",
     )
     embeddings = ef(["Here is an article about llamas...", "this is another article"])

diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py
@@ -61,7 +61,7 @@ def __init__(
         model_name: str = "all-MiniLM-L6-v2",
         device: str = "cpu",
         normalize_embeddings: bool = False,
-        **kwargs: Any
+        **kwargs: Any,
     ):
         """Initialize SentenceTransformerEmbeddingFunction.
 
@@ -78,7 +78,9 @@ def __init__(
                 raise ValueError(
                     "The sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`"
                 )
-            self.models[model_name] = SentenceTransformer(model_name, device=device, **kwargs)
+            self.models[model_name] = SentenceTransformer(
+                model_name, device=device, **kwargs
+            )
         self._model = self.models[model_name]
         self._normalize_embeddings = normalize_embeddings
 
@@ -830,17 +832,16 @@ def __call__(self, input: Documents) -> Embeddings:
 
 class OllamaEmbeddingFunction(EmbeddingFunction[Documents]):
     """
-    This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference).
-    The embedding model is configured in the server.
+    This class is used to generate embeddings for a list of texts using the Ollama Embedding API (https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings).
     """
 
-    def __init__(self, url: str, model_name: str):
+    def __init__(self, url: str, model_name: str) -> None:
         """
-        Initialize the HuggingFaceEmbeddingServer.
+        Initialize the Ollama Embedding Function.
 
         Args:
-            url (str): The URL of the HuggingFace Embedding Server.
-            model_name (str): The name of the model to use for text embeddings. E.g. "llama2"
+            url (str): The URL of the Ollama Server.
+            model_name (str): The name of the model to use for text embeddings. E.g. "nomic-embed-text" (see https://ollama.com/library for available models).
         """
         try:
             import requests
@@ -863,7 +864,7 @@ def __call__(self, input: Documents) -> Embeddings:
             Embeddings: The embeddings for the texts.
 
         Example:
-            >>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="llama2")
+            >>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="nomic-embed-text")
             >>> texts = ["Hello, world!", "How are you?"]
             >>> embeddings = ollama_ef(texts)
         """
@@ -878,7 +879,9 @@ def __call__(self, input: Documents) -> Embeddings:
         return cast(
             Embeddings,
             [
-                embedding["embedding"] for embedding in embeddings if "embedding" in embedding
+                embedding["embedding"]
+                for embedding in embeddings
+                if "embedding" in embedding
             ],
         )
 

diff --git a/clients/js/test/add.collections.test.ts b/clients/js/test/add.collections.test.ts
@@ -5,7 +5,7 @@ import { METADATAS } from "./data";
 import { IncludeEnum } from "../src/types";
 import { OpenAIEmbeddingFunction } from "../src/embeddings/OpenAIEmbeddingFunction";
 import { CohereEmbeddingFunction } from "../src/embeddings/CohereEmbeddingFunction";
-import {OllamaEmbeddingFunction} from "../src/embeddings/OllamaEmbeddingFunction";
+import { OllamaEmbeddingFunction } from "../src/embeddings/OllamaEmbeddingFunction";
 test("it should add single embeddings to a collection", async () => {
   await chroma.reset();
   const collection = await chroma.createCollection({ name: "test" });
@@ -122,24 +122,29 @@ test("should error on empty embedding", async () => {
   }
 });
 
-
 if (!process.env.OLLAMA_SERVER_URL) {
-  test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => {
-  });
+  test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => {});
 } else {
   test("it should use ollama EF", async () => {
     await chroma.reset();
-    const embedder = new OllamaEmbeddingFunction({ url: process.env.OLLAMA_SERVER_URL || "http://127.0.0.1:11434/api/embeddings", model: "llama2" })
-    const collection = await chroma.createCollection({ name: "test" ,embeddingFunction: embedder});
+    const embedder = new OllamaEmbeddingFunction({
+      url:
+        process.env.OLLAMA_SERVER_URL ||
+        "http://127.0.0.1:11434/api/embeddings",
+      model: "nomic-embed-text",
+    });
+    const collection = await chroma.createCollection({
+      name: "test",
+      embeddingFunction: embedder,
+    });
     const embeddings = await embedder.generate(DOCUMENTS);
     await collection.add({ ids: IDS, embeddings: embeddings });
     const count = await collection.count();
     expect(count).toBe(3);
     var res = await collection.get({
-      ids: IDS, include: [
-        IncludeEnum.Embeddings,
-      ]
+      ids: IDS,
+      include: [IncludeEnum.Embeddings],
     });
     expect(res.embeddings).toEqual(embeddings); // reverse because of the order of the ids
   });
-};
+}
diff --git a/examples/use_with/ollama.md b/examples/use_with/ollama.md
@@ -1,15 +1,15 @@
 # Ollama
 
-First let's run a local docker container with Ollama. We'll pull `llama2` model:
+First let's run a local docker container with Ollama. We'll pull `nomic-embed-text` model:
 
 ```bash
 docker run -d -v ./ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-docker exec -it ollama ollama run llama2 # press Ctrl+D to exit after model downloads successfully
+docker exec -it ollama ollama run nomic-embed-text # press Ctrl+D to exit after model downloads successfully
 # test it
-curl http://localhost:11434/api/embeddings -d '{\n  "model": "llama2",\n  "prompt": "Here is an article about llamas..."\n}'
+curl http://localhost:11434/api/embeddings -d '{"model": "nomic-embed-text","prompt": "Here is an article about llamas..."}'
 ```
 
-Now let's configure our OllamaEmbeddingFunction Embedding function with custom endpoint:
+Now let's configure our OllamaEmbeddingFunction Embedding (python) function with the default Ollama endpoint:
 
 ```python
 import chromadb
@@ -20,8 +20,21 @@ client = chromadb.PersistentClient(path="ollama")
 # create EF with custom endpoint
 ef = OllamaEmbeddingFunction(
     model_name="llama2",
-    url="http://localhost:11434/api/embeddings",
+    url="http://127.0.0.1:11434/api/embeddings",
 )
 
-print(ef("Here is an article about llamas..."))
+print(ef(["Here is an article about llamas..."]))
+```
+
+For JS users, you can use the `OllamaEmbeddingFunction` class to create embeddings:
+
+```javascript
+const {OllamaEmbeddingFunction} = require('chromadb');
+const embedder = new OllamaEmbeddingFunction({
+    url: "http://127.0.0.1:11434/api/embeddings",
+    model: "llama2"
+})
+
+// use directly
+const embeddings = embedder.generate(["Here is an article about llamas..."])
 ```