From 0c82ac476b1c950371f3e49144167c239949c1ea Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Mon, 25 Mar 2024 13:56:44 +0200 Subject: [PATCH] docs: Updated docstrings - Updated examples to include JS - Changed examples to use nomic-embed-text instead of llama2 --- chromadb/test/ef/test_ollama_ef.py | 2 +- chromadb/utils/embedding_functions.py | 23 +++++++++++++---------- clients/js/test/add.collections.test.ts | 25 +++++++++++++++---------- examples/use_with/ollama.md | 25 +++++++++++++++++++------ 4 files changed, 48 insertions(+), 27 deletions(-) diff --git a/chromadb/test/ef/test_ollama_ef.py b/chromadb/test/ef/test_ollama_ef.py index 0c77eef3a12..d44f1e8e6d1 100644 --- a/chromadb/test/ef/test_ollama_ef.py +++ b/chromadb/test/ef/test_ollama_ef.py @@ -27,7 +27,7 @@ def test_ollama() -> None: except (HTTPError, ConnectionError): pytest.skip("Ollama server not running. Skipping test.") ef = OllamaEmbeddingFunction( - model_name=os.environ.get("OLLAMA_MODEL") or "llama2", + model_name=os.environ.get("OLLAMA_MODEL") or "nomic-embed-text", url=f"{os.environ.get('OLLAMA_SERVER_URL')}/embeddings", ) embeddings = ef(["Here is an article about llamas...", "this is another article"]) diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index 4d997339d52..22d57e6a3d6 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -61,7 +61,7 @@ def __init__( model_name: str = "all-MiniLM-L6-v2", device: str = "cpu", normalize_embeddings: bool = False, - **kwargs: Any + **kwargs: Any, ): """Initialize SentenceTransformerEmbeddingFunction. @@ -78,7 +78,9 @@ def __init__( raise ValueError( "The sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`" ) - self.models[model_name] = SentenceTransformer(model_name, device=device, **kwargs) + self.models[model_name] = SentenceTransformer( + model_name, device=device, **kwargs + ) self._model = self.models[model_name] self._normalize_embeddings = normalize_embeddings @@ -830,17 +832,16 @@ def __call__(self, input: Documents) -> Embeddings: class OllamaEmbeddingFunction(EmbeddingFunction[Documents]): """ - This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference). - The embedding model is configured in the server. + This class is used to generate embeddings for a list of texts using the Ollama Embedding API (https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings). """ - def __init__(self, url: str, model_name: str): + def __init__(self, url: str, model_name: str) -> None: """ - Initialize the HuggingFaceEmbeddingServer. + Initialize the Ollama Embedding Function. Args: - url (str): The URL of the HuggingFace Embedding Server. - model_name (str): The name of the model to use for text embeddings. E.g. "llama2" + url (str): The URL of the Ollama Server. + model_name (str): The name of the model to use for text embeddings. E.g. "nomic-embed-text" (see https://ollama.com/library for available models). """ try: import requests @@ -863,7 +864,7 @@ def __call__(self, input: Documents) -> Embeddings: Embeddings: The embeddings for the texts. Example: - >>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="llama2") + >>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="nomic-embed-text") >>> texts = ["Hello, world!", "How are you?"] >>> embeddings = ollama_ef(texts) """ @@ -878,7 +879,9 @@ def __call__(self, input: Documents) -> Embeddings: return cast( Embeddings, [ - embedding["embedding"] for embedding in embeddings if "embedding" in embedding + embedding["embedding"] + for embedding in embeddings + if "embedding" in embedding ], ) diff --git a/clients/js/test/add.collections.test.ts b/clients/js/test/add.collections.test.ts index 194989e96f1..41b3de3fef5 100644 --- a/clients/js/test/add.collections.test.ts +++ b/clients/js/test/add.collections.test.ts @@ -5,7 +5,7 @@ import { METADATAS } from "./data"; import { IncludeEnum } from "../src/types"; import { OpenAIEmbeddingFunction } from "../src/embeddings/OpenAIEmbeddingFunction"; import { CohereEmbeddingFunction } from "../src/embeddings/CohereEmbeddingFunction"; -import {OllamaEmbeddingFunction} from "../src/embeddings/OllamaEmbeddingFunction"; +import { OllamaEmbeddingFunction } from "../src/embeddings/OllamaEmbeddingFunction"; test("it should add single embeddings to a collection", async () => { await chroma.reset(); const collection = await chroma.createCollection({ name: "test" }); @@ -122,24 +122,29 @@ test("should error on empty embedding", async () => { } }); - if (!process.env.OLLAMA_SERVER_URL) { - test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => { - }); + test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => {}); } else { test("it should use ollama EF", async () => { await chroma.reset(); - const embedder = new OllamaEmbeddingFunction({ url: process.env.OLLAMA_SERVER_URL || "http://127.0.0.1:11434/api/embeddings", model: "llama2" }) - const collection = await chroma.createCollection({ name: "test" ,embeddingFunction: embedder}); + const embedder = new OllamaEmbeddingFunction({ + url: + process.env.OLLAMA_SERVER_URL || + "http://127.0.0.1:11434/api/embeddings", + model: "nomic-embed-text", + }); + const collection = await chroma.createCollection({ + name: "test", + embeddingFunction: embedder, + }); const embeddings = await embedder.generate(DOCUMENTS); await collection.add({ ids: IDS, embeddings: embeddings }); const count = await collection.count(); expect(count).toBe(3); var res = await collection.get({ - ids: IDS, include: [ - IncludeEnum.Embeddings, - ] + ids: IDS, + include: [IncludeEnum.Embeddings], }); expect(res.embeddings).toEqual(embeddings); // reverse because of the order of the ids }); -}; +} diff --git a/examples/use_with/ollama.md b/examples/use_with/ollama.md index 38ce7d36a70..86228efa054 100644 --- a/examples/use_with/ollama.md +++ b/examples/use_with/ollama.md @@ -1,15 +1,15 @@ # Ollama -First let's run a local docker container with Ollama. We'll pull `llama2` model: +First let's run a local docker container with Ollama. We'll pull `nomic-embed-text` model: ```bash docker run -d -v ./ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama -docker exec -it ollama ollama run llama2 # press Ctrl+D to exit after model downloads successfully +docker exec -it ollama ollama run nomic-embed-text # press Ctrl+D to exit after model downloads successfully # test it -curl http://localhost:11434/api/embeddings -d '{\n "model": "llama2",\n "prompt": "Here is an article about llamas..."\n}' +curl http://localhost:11434/api/embeddings -d '{"model": "nomic-embed-text","prompt": "Here is an article about llamas..."}' ``` -Now let's configure our OllamaEmbeddingFunction Embedding function with custom endpoint: +Now let's configure our OllamaEmbeddingFunction Embedding (python) function with the default Ollama endpoint: ```python import chromadb @@ -20,8 +20,21 @@ client = chromadb.PersistentClient(path="ollama") # create EF with custom endpoint ef = OllamaEmbeddingFunction( model_name="llama2", - url="http://localhost:11434/api/embeddings", + url="http://127.0.0.1:11434/api/embeddings", ) -print(ef("Here is an article about llamas...")) +print(ef(["Here is an article about llamas..."])) +``` + +For JS users, you can use the `OllamaEmbeddingFunction` class to create embeddings: + +```javascript +const {OllamaEmbeddingFunction} = require('chromadb'); +const embedder = new OllamaEmbeddingFunction({ + url: "http://127.0.0.1:11434/api/embeddings", + model: "llama2" +}) + +// use directly +const embeddings = embedder.generate(["Here is an article about llamas..."]) ```