Skip to content

Commit

Permalink
Adding VoyageAI text embedding integration
Browse files Browse the repository at this point in the history
  • Loading branch information
fzowl committed Dec 22, 2024
1 parent 4488279 commit 15d6b72
Show file tree
Hide file tree
Showing 6 changed files with 223 additions and 2 deletions.
1 change: 1 addition & 0 deletions chromadb/test/ef/test_ef.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_get_builtins_holds() -> None:
expected_builtins = {
"AmazonBedrockEmbeddingFunction",
"CohereEmbeddingFunction",
"VoyageAIEmbeddingFunction",
"GoogleGenerativeAiEmbeddingFunction",
"GooglePalmEmbeddingFunction",
"GoogleVertexEmbeddingFunction",
Expand Down
27 changes: 27 additions & 0 deletions chromadb/utils/embedding_functions/voyageai_embedding_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from chromadb.api.types import Documents, EmbeddingFunction, Embeddings

logger = logging.getLogger(__name__)


class VoyageAIEmbeddingFunction(EmbeddingFunction[Documents]):
def __init__(self, api_key: str, model_name: str):
try:
import voyageai
except ImportError:
raise ValueError(
"The voyageai python package is not installed. Please install it with `pip install voyageai`"
)

self._client = voyageai.Client(api_key=api_key)
self._model_name = model_name

def __call__(self, input: Documents) -> Embeddings:
# Call Cohere Embedding API for each document.
return [
embeddings
for embeddings in self._client.embed(
texts=input, model=self._model_name
)
]
8 changes: 6 additions & 2 deletions clients/js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
"@jest/globals": "^29.7.0",
"@jest/types": "^29.6.3",
"@openapi-generator-plus/typescript-fetch-client-generator": "^1.5.0",
"@types/bcrypt": "^5.0.2",
"@types/jest": "^29.5.0",
"@types/node": "^20.8.10",
"@types/bcrypt": "^5.0.2",
"bcrypt": "^5.1.1",
"jest": "^29.5.0",
"npm-run-all": "^4.1.5",
Expand Down Expand Up @@ -69,7 +69,8 @@
"peerDependencies": {
"@google/generative-ai": "^0.1.1",
"cohere-ai": "^5.0.0 || ^6.0.0 || ^7.0.0",
"openai": "^3.0.0 || ^4.0.0"
"openai": "^4.77.0",
"voyageai": "^0.0.3-1"
},
"peerDependenciesMeta": {
"@google/generative-ai": {
Expand All @@ -80,6 +81,9 @@
},
"openai": {
"optional": true
},
"voyageai": {
"optional": true
}
}
}
78 changes: 78 additions & 0 deletions clients/js/src/embeddings/VoyageAIEmbeddingFunction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { IEmbeddingFunction } from "./IEmbeddingFunction";

class VoyageAIAPI {
private client: any;
private apiKey: string;

constructor(configuration: { apiKey: string }) {
this.apiKey = configuration.apiKey;
}

private async loadClient() {
if (this.client) return;
//@ts-ignore
const voyageai = await import("voyageai").then((voyageai) => {
return voyageai;
});
// @ts-ignore
this.client = new voyageai.VoyageAIClient({
apiKey: this.apiKey,
});
}

public async createEmbedding(params: {
model: string;
input: string[];
}): Promise<number[][]> {
await this.loadClient();
return await this.client
.embed({ input: params.input, model: params.model })
.then((response: any) => {
return response.data.map((item: { embedding: number[]; }) => item.embedding);
});
}
}

export class VoyageAIEmbeddingFunction implements IEmbeddingFunction {
private voyageAiApi?: VoyageAIAPI;
private model: string;
private apiKey: string;
constructor({
api_key,
model,
}: {
api_key: string;
model: string;
}) {
this.model = model;
this.apiKey = api_key;
}

private async initClient() {
if (this.voyageAiApi) return;
try {
// @ts-ignore
this.voyageAiApi = await import("voyageai").then((voyageai) => {
// @ts-ignore
return new VoyageAIAPI({ apiKey: this.apiKey });
});
} catch (e) {
// @ts-ignore
if (e.code === "MODULE_NOT_FOUND") {
throw new Error(
"Please install the voyageai package to use the VoyageAIEmbeddingFunction, `npm install -S voyageai`",
);
}
throw e;
}
}

public async generate(texts: string[]): Promise<number[][]> {
await this.initClient();
// @ts-ignore
return await this.voyageAiApi.createEmbedding({
model: this.model,
input: texts,
});
}
}
25 changes: 25 additions & 0 deletions clients/js/test/add.collections.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { IncludeEnum } from "../src/types";
import { OpenAIEmbeddingFunction } from "../src/embeddings/OpenAIEmbeddingFunction";
import { CohereEmbeddingFunction } from "../src/embeddings/CohereEmbeddingFunction";
import { OllamaEmbeddingFunction } from "../src/embeddings/OllamaEmbeddingFunction";
import { VoyageAIEmbeddingFunction } from "../src/embeddings/VoyageAIEmbeddingFunction";
import { InvalidCollectionError } from "../src/Errors";
import { ChromaClient } from "../src/ChromaClient";

Expand Down Expand Up @@ -150,6 +151,30 @@ describe("add collections", () => {
});
}

if (!process.env.VOYAGE_API_KEY) {
test.skip("it should add VoyageAI embeddings", async () => {});
} else {
test("it should add VoyageAI embeddings", async () => {
const embedder = new VoyageAIEmbeddingFunction({
api_key: process.env.VOYAGE_API_KEY || "",
model: "voyage-3-large"
});
const collection = await client.createCollection({
name: "test",
embeddingFunction: embedder,
});
const embeddings = await embedder.generate(DOCUMENTS);
await collection.add({ ids: IDS, embeddings: embeddings });
const count = await collection.count();
expect(count).toBe(3);
var res = await collection.get({
ids: IDS,
include: [IncludeEnum.Embeddings],
});
expect(res.embeddings).toEqual(embeddings); // reverse because of the order of the ids
});
}

test("add documents", async () => {
const collection = await client.createCollection({ name: "test" });
await collection.add({
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
---
id: 'voyageai'
name: 'VoyageAI'
---

# VoyageAI

Chroma also provides a convenient wrapper around VoyageAI's embedding API. This embedding function runs remotely on VoyageAI’s servers, and requires an API key. You can get an API key by signing up for an account at [VoyageAI](https://dash.voyageai.com/).

{% Tabs %}
{% Tab label="python" %}

This embedding function relies on the `voyageai` python package, which you can install with `pip install voyageai`.

```python
import chromadb.utils.embedding_functions as embedding_functions
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction(api_key="YOUR_API_KEY", model_name="voyage-3-large")
voyageai_ef(texts=["document1","document2"])
```

{% /Tab %}

{% Tab label="typescript" %}

```typescript
import { VoyageAIEmbeddingFunction } from 'chromadb';

const embedder = new VoyageAIEmbeddingFunction("apiKey", "model_name")

// use directly
const embeddings = embedder.generate(["document1","document2"])

// pass documents to query for .add and .query
const collection = await client.createCollection({name: "name", embeddingFunction: embedder})
const collectionGet = await client.getCollection({name: "name", embeddingFunction: embedder})
```

{% /Tab %}

{% /Tabs %}

### Multilingual model example

{% TabbedCodeBlock %}

{% Tab label="python" %}

```python
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction(
api_key="YOUR_API_KEY",
model_name="voyage-3-large")

multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!',
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!',
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!',
'Ciao da VoyageAI!', '您好,来自 VoyageAI!',
'कोहिअर से VoyageAI!' ]

voyageai_ef(texts=multilingual_texts)

```

{% /Tab %}

{% Tab label="typescript" %}

```typescript
import { VoyageAIEmbeddingFunction } from 'chromadb';

const embedder = new VoyageAIEmbeddingFunction("apiKey", "voyage-3-large")

multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!',
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!',
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!',
'Ciao da VoyageAI!', '您好,来自 VoyageAI!',
'कोहिअर से VoyageAI!' ]

const embeddings = embedder.generate(multilingual_texts)

```

{% /Tab %}

{% /TabbedCodeBlock %}

For further details on VoyageAI's models check the [documentation](https://docs.voyageai.com/docs/introduction) and the [blogs](https://blog.voyageai.com/).

0 comments on commit 15d6b72

Please sign in to comment.