Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New functionality: Adding VoyageAI text embedding integration #3348

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions chromadb/test/ef/test_ef.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_get_builtins_holds() -> None:
expected_builtins = {
"AmazonBedrockEmbeddingFunction",
"CohereEmbeddingFunction",
"VoyageAIEmbeddingFunction",
"GoogleGenerativeAiEmbeddingFunction",
"GooglePalmEmbeddingFunction",
"GoogleVertexEmbeddingFunction",
Expand Down
27 changes: 27 additions & 0 deletions chromadb/utils/embedding_functions/voyageai_embedding_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging

from chromadb.api.types import Documents, EmbeddingFunction, Embeddings

logger = logging.getLogger(__name__)


class VoyageAIEmbeddingFunction(EmbeddingFunction[Documents]):
def __init__(self, api_key: str, model_name: str):
try:
import voyageai
except ImportError:
raise ValueError(
"The voyageai python package is not installed. Please install it with `pip install voyageai`"
)

self._client = voyageai.Client(api_key=api_key)
self._model_name = model_name

def __call__(self, input: Documents) -> Embeddings:
# Call Cohere Embedding API for each document.
return [
embeddings
for embeddings in self._client.embed(
texts=input, model=self._model_name
)
]
8 changes: 6 additions & 2 deletions clients/js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
"@jest/globals": "^29.7.0",
"@jest/types": "^29.6.3",
"@openapi-generator-plus/typescript-fetch-client-generator": "^1.5.0",
"@types/bcrypt": "^5.0.2",
"@types/jest": "^29.5.0",
"@types/node": "^20.8.10",
"@types/bcrypt": "^5.0.2",
"bcrypt": "^5.1.1",
"jest": "^29.5.0",
"npm-run-all": "^4.1.5",
Expand Down Expand Up @@ -69,7 +69,8 @@
"peerDependencies": {
"@google/generative-ai": "^0.1.1",
"cohere-ai": "^5.0.0 || ^6.0.0 || ^7.0.0",
"openai": "^3.0.0 || ^4.0.0"
fzowl marked this conversation as resolved.
Show resolved Hide resolved
"openai": "^3.0.0 || ^4.0.0",
"voyageai": "^0.0.3-1"
},
"peerDependenciesMeta": {
"@google/generative-ai": {
Expand All @@ -80,6 +81,9 @@
},
"openai": {
"optional": true
},
"voyageai": {
"optional": true
}
}
}
78 changes: 78 additions & 0 deletions clients/js/src/embeddings/VoyageAIEmbeddingFunction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { IEmbeddingFunction } from "./IEmbeddingFunction";

class VoyageAIAPI {
private client: any;
private apiKey: string;

constructor(configuration: { apiKey: string }) {
this.apiKey = configuration.apiKey;
}

private async loadClient() {
if (this.client) return;
//@ts-ignore
const voyageai = await import("voyageai").then((voyageai) => {
return voyageai;
});
// @ts-ignore
this.client = new voyageai.VoyageAIClient({
apiKey: this.apiKey,
});
}

public async createEmbedding(params: {
model: string;
input: string[];
}): Promise<number[][]> {
await this.loadClient();
return await this.client
.embed({ input: params.input, model: params.model })
.then((response: any) => {
return response.data.map((item: { embedding: number[]; }) => item.embedding);
});
}
}

export class VoyageAIEmbeddingFunction implements IEmbeddingFunction {
private voyageAiApi?: VoyageAIAPI;
private model: string;
private apiKey: string;
constructor({
api_key,
model,
}: {
api_key: string;
model: string;
}) {
this.model = model;
this.apiKey = api_key;
}

private async initClient() {
if (this.voyageAiApi) return;
try {
// @ts-ignore
this.voyageAiApi = await import("voyageai").then((voyageai) => {
// @ts-ignore
return new VoyageAIAPI({ apiKey: this.apiKey });
});
} catch (e) {
// @ts-ignore
if (e.code === "MODULE_NOT_FOUND") {
throw new Error(
"Please install the voyageai package to use the VoyageAIEmbeddingFunction, `npm install -S voyageai`",
);
}
throw e;
}
}

public async generate(texts: string[]): Promise<number[][]> {
await this.initClient();
// @ts-ignore
return await this.voyageAiApi.createEmbedding({
model: this.model,
input: texts,
});
}
}
25 changes: 25 additions & 0 deletions clients/js/test/add.collections.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { IncludeEnum } from "../src/types";
import { OpenAIEmbeddingFunction } from "../src/embeddings/OpenAIEmbeddingFunction";
import { CohereEmbeddingFunction } from "../src/embeddings/CohereEmbeddingFunction";
import { OllamaEmbeddingFunction } from "../src/embeddings/OllamaEmbeddingFunction";
import { VoyageAIEmbeddingFunction } from "../src/embeddings/VoyageAIEmbeddingFunction";
import { InvalidCollectionError } from "../src/Errors";
import { ChromaClient } from "../src/ChromaClient";

Expand Down Expand Up @@ -150,6 +151,30 @@ describe("add collections", () => {
});
}

if (!process.env.VOYAGE_API_KEY) {
test.skip("it should add VoyageAI embeddings", async () => {});
} else {
test("it should add VoyageAI embeddings", async () => {
const embedder = new VoyageAIEmbeddingFunction({
api_key: process.env.VOYAGE_API_KEY || "",
model: "voyage-3-large"
});
const collection = await client.createCollection({
name: "test",
embeddingFunction: embedder,
});
const embeddings = await embedder.generate(DOCUMENTS);
await collection.add({ ids: IDS, embeddings: embeddings });
const count = await collection.count();
expect(count).toBe(3);
var res = await collection.get({
ids: IDS,
include: [IncludeEnum.Embeddings],
});
expect(res.embeddings).toEqual(embeddings); // reverse because of the order of the ids
});
}

test("add documents", async () => {
const collection = await client.createCollection({ name: "test" });
await collection.add({
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
---
id: 'voyageai'
name: 'VoyageAI'
---

# VoyageAI

Chroma also provides a convenient wrapper around VoyageAI's embedding API. This embedding function runs remotely on VoyageAI’s servers, and requires an API key. You can get an API key by signing up for an account at [VoyageAI](https://dash.voyageai.com/).

{% Tabs %}
{% Tab label="python" %}

This embedding function relies on the `voyageai` python package, which you can install with `pip install voyageai`.

```python
import chromadb.utils.embedding_functions as embedding_functions
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction(api_key="YOUR_API_KEY", model_name="voyage-3-large")
voyageai_ef(texts=["document1","document2"])
```

{% /Tab %}

{% Tab label="typescript" %}

```typescript
import { VoyageAIEmbeddingFunction } from 'chromadb';

const embedder = new VoyageAIEmbeddingFunction("apiKey", "model_name")

// use directly
const embeddings = embedder.generate(["document1","document2"])

// pass documents to query for .add and .query
const collection = await client.createCollection({name: "name", embeddingFunction: embedder})
const collectionGet = await client.getCollection({name: "name", embeddingFunction: embedder})
```

{% /Tab %}

{% /Tabs %}

### Multilingual model example

{% TabbedCodeBlock %}

{% Tab label="python" %}

```python
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction(
api_key="YOUR_API_KEY",
model_name="voyage-3-large")

multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!',
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!',
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!',
'Ciao da VoyageAI!', '您好,来自 VoyageAI!',
'कोहिअर से VoyageAI!' ]

voyageai_ef(texts=multilingual_texts)

```

{% /Tab %}

{% Tab label="typescript" %}

```typescript
import { VoyageAIEmbeddingFunction } from 'chromadb';

const embedder = new VoyageAIEmbeddingFunction("apiKey", "voyage-3-large")

multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!',
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!',
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!',
'Ciao da VoyageAI!', '您好,来自 VoyageAI!',
'कोहिअर से VoyageAI!' ]

const embeddings = embedder.generate(multilingual_texts)

```

{% /Tab %}

{% /TabbedCodeBlock %}

For further details on VoyageAI's models check the [documentation](https://docs.voyageai.com/docs/introduction) and the [blogs](https://blog.voyageai.com/).
Loading