Skip to content

Commit

Permalink
docs: Updated docstrings
Browse files Browse the repository at this point in the history
- Updated examples to include JS
- Changed examples to use nomic-embed-text instead of llama2
  • Loading branch information
tazarov committed Mar 25, 2024
1 parent 2cac735 commit 0c82ac4
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 27 deletions.
2 changes: 1 addition & 1 deletion chromadb/test/ef/test_ollama_ef.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_ollama() -> None:
except (HTTPError, ConnectionError):
pytest.skip("Ollama server not running. Skipping test.")
ef = OllamaEmbeddingFunction(
model_name=os.environ.get("OLLAMA_MODEL") or "llama2",
model_name=os.environ.get("OLLAMA_MODEL") or "nomic-embed-text",
url=f"{os.environ.get('OLLAMA_SERVER_URL')}/embeddings",
)
embeddings = ef(["Here is an article about llamas...", "this is another article"])
Expand Down
23 changes: 13 additions & 10 deletions chromadb/utils/embedding_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
model_name: str = "all-MiniLM-L6-v2",
device: str = "cpu",
normalize_embeddings: bool = False,
**kwargs: Any
**kwargs: Any,
):
"""Initialize SentenceTransformerEmbeddingFunction.
Expand All @@ -78,7 +78,9 @@ def __init__(
raise ValueError(
"The sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`"
)
self.models[model_name] = SentenceTransformer(model_name, device=device, **kwargs)
self.models[model_name] = SentenceTransformer(
model_name, device=device, **kwargs
)
self._model = self.models[model_name]
self._normalize_embeddings = normalize_embeddings

Expand Down Expand Up @@ -830,17 +832,16 @@ def __call__(self, input: Documents) -> Embeddings:

class OllamaEmbeddingFunction(EmbeddingFunction[Documents]):
"""
This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference).
The embedding model is configured in the server.
This class is used to generate embeddings for a list of texts using the Ollama Embedding API (https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings).
"""

def __init__(self, url: str, model_name: str):
def __init__(self, url: str, model_name: str) -> None:
"""
Initialize the HuggingFaceEmbeddingServer.
Initialize the Ollama Embedding Function.
Args:
url (str): The URL of the HuggingFace Embedding Server.
model_name (str): The name of the model to use for text embeddings. E.g. "llama2"
url (str): The URL of the Ollama Server.
model_name (str): The name of the model to use for text embeddings. E.g. "nomic-embed-text" (see https://ollama.com/library for available models).
"""
try:
import requests
Expand All @@ -863,7 +864,7 @@ def __call__(self, input: Documents) -> Embeddings:
Embeddings: The embeddings for the texts.
Example:
>>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="llama2")
>>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="nomic-embed-text")
>>> texts = ["Hello, world!", "How are you?"]
>>> embeddings = ollama_ef(texts)
"""
Expand All @@ -878,7 +879,9 @@ def __call__(self, input: Documents) -> Embeddings:
return cast(
Embeddings,
[
embedding["embedding"] for embedding in embeddings if "embedding" in embedding
embedding["embedding"]
for embedding in embeddings
if "embedding" in embedding
],
)

Expand Down
25 changes: 15 additions & 10 deletions clients/js/test/add.collections.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { METADATAS } from "./data";
import { IncludeEnum } from "../src/types";
import { OpenAIEmbeddingFunction } from "../src/embeddings/OpenAIEmbeddingFunction";
import { CohereEmbeddingFunction } from "../src/embeddings/CohereEmbeddingFunction";
import {OllamaEmbeddingFunction} from "../src/embeddings/OllamaEmbeddingFunction";
import { OllamaEmbeddingFunction } from "../src/embeddings/OllamaEmbeddingFunction";
test("it should add single embeddings to a collection", async () => {
await chroma.reset();
const collection = await chroma.createCollection({ name: "test" });
Expand Down Expand Up @@ -122,24 +122,29 @@ test("should error on empty embedding", async () => {
}
});


if (!process.env.OLLAMA_SERVER_URL) {
test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => {
});
test.skip("it should use ollama EF, OLLAMA_SERVER_URL not defined", async () => {});
} else {
test("it should use ollama EF", async () => {
await chroma.reset();
const embedder = new OllamaEmbeddingFunction({ url: process.env.OLLAMA_SERVER_URL || "http://127.0.0.1:11434/api/embeddings", model: "llama2" })
const collection = await chroma.createCollection({ name: "test" ,embeddingFunction: embedder});
const embedder = new OllamaEmbeddingFunction({
url:
process.env.OLLAMA_SERVER_URL ||
"http://127.0.0.1:11434/api/embeddings",
model: "nomic-embed-text",
});
const collection = await chroma.createCollection({
name: "test",
embeddingFunction: embedder,
});
const embeddings = await embedder.generate(DOCUMENTS);
await collection.add({ ids: IDS, embeddings: embeddings });
const count = await collection.count();
expect(count).toBe(3);
var res = await collection.get({
ids: IDS, include: [
IncludeEnum.Embeddings,
]
ids: IDS,
include: [IncludeEnum.Embeddings],
});
expect(res.embeddings).toEqual(embeddings); // reverse because of the order of the ids
});
};
}
25 changes: 19 additions & 6 deletions examples/use_with/ollama.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Ollama

First let's run a local docker container with Ollama. We'll pull `llama2` model:
First let's run a local docker container with Ollama. We'll pull `nomic-embed-text` model:

```bash
docker run -d -v ./ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
docker exec -it ollama ollama run llama2 # press Ctrl+D to exit after model downloads successfully
docker exec -it ollama ollama run nomic-embed-text # press Ctrl+D to exit after model downloads successfully
# test it
curl http://localhost:11434/api/embeddings -d '{\n "model": "llama2",\n "prompt": "Here is an article about llamas..."\n}'
curl http://localhost:11434/api/embeddings -d '{"model": "nomic-embed-text","prompt": "Here is an article about llamas..."}'
```

Now let's configure our OllamaEmbeddingFunction Embedding function with custom endpoint:
Now let's configure our OllamaEmbeddingFunction Embedding (python) function with the default Ollama endpoint:

```python
import chromadb
Expand All @@ -20,8 +20,21 @@ client = chromadb.PersistentClient(path="ollama")
# create EF with custom endpoint
ef = OllamaEmbeddingFunction(
model_name="llama2",
url="http://localhost:11434/api/embeddings",
url="http://127.0.0.1:11434/api/embeddings",
)

print(ef("Here is an article about llamas..."))
print(ef(["Here is an article about llamas..."]))
```

For JS users, you can use the `OllamaEmbeddingFunction` class to create embeddings:

```javascript
const {OllamaEmbeddingFunction} = require('chromadb');
const embedder = new OllamaEmbeddingFunction({
url: "http://127.0.0.1:11434/api/embeddings",
model: "llama2"
})

// use directly
const embeddings = embedder.generate(["Here is an article about llamas..."])
```

0 comments on commit 0c82ac4

Please sign in to comment.