-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding VoyageAI text embedding integration
- Loading branch information
Showing
6 changed files
with
223 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
chromadb/utils/embedding_functions/voyageai_embedding_function.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import logging | ||
|
||
from chromadb.api.types import Documents, EmbeddingFunction, Embeddings | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class VoyageAIEmbeddingFunction(EmbeddingFunction[Documents]): | ||
def __init__(self, api_key: str, model_name: str): | ||
try: | ||
import voyageai | ||
except ImportError: | ||
raise ValueError( | ||
"The voyageai python package is not installed. Please install it with `pip install voyageai`" | ||
) | ||
|
||
self._client = voyageai.Client(api_key=api_key) | ||
self._model_name = model_name | ||
|
||
def __call__(self, input: Documents) -> Embeddings: | ||
# Call Cohere Embedding API for each document. | ||
return [ | ||
embeddings | ||
for embeddings in self._client.embed( | ||
texts=input, model=self._model_name | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import { IEmbeddingFunction } from "./IEmbeddingFunction"; | ||
|
||
class VoyageAIAPI { | ||
private client: any; | ||
private apiKey: string; | ||
|
||
constructor(configuration: { apiKey: string }) { | ||
this.apiKey = configuration.apiKey; | ||
} | ||
|
||
private async loadClient() { | ||
if (this.client) return; | ||
//@ts-ignore | ||
const voyageai = await import("voyageai").then((voyageai) => { | ||
return voyageai; | ||
}); | ||
// @ts-ignore | ||
this.client = new voyageai.VoyageAIClient({ | ||
apiKey: this.apiKey, | ||
}); | ||
} | ||
|
||
public async createEmbedding(params: { | ||
model: string; | ||
input: string[]; | ||
}): Promise<number[][]> { | ||
await this.loadClient(); | ||
return await this.client | ||
.embed({ input: params.input, model: params.model }) | ||
.then((response: any) => { | ||
return response.data.map((item: { embedding: number[]; }) => item.embedding); | ||
}); | ||
} | ||
} | ||
|
||
export class VoyageAIEmbeddingFunction implements IEmbeddingFunction { | ||
private voyageAiApi?: VoyageAIAPI; | ||
private model: string; | ||
private apiKey: string; | ||
constructor({ | ||
api_key, | ||
model, | ||
}: { | ||
api_key: string; | ||
model: string; | ||
}) { | ||
this.model = model; | ||
this.apiKey = api_key; | ||
} | ||
|
||
private async initClient() { | ||
if (this.voyageAiApi) return; | ||
try { | ||
// @ts-ignore | ||
this.voyageAiApi = await import("voyageai").then((voyageai) => { | ||
// @ts-ignore | ||
return new VoyageAIAPI({ apiKey: this.apiKey }); | ||
}); | ||
} catch (e) { | ||
// @ts-ignore | ||
if (e.code === "MODULE_NOT_FOUND") { | ||
throw new Error( | ||
"Please install the voyageai package to use the VoyageAIEmbeddingFunction, `npm install -S voyageai`", | ||
); | ||
} | ||
throw e; | ||
} | ||
} | ||
|
||
public async generate(texts: string[]): Promise<number[][]> { | ||
await this.initClient(); | ||
// @ts-ignore | ||
return await this.voyageAiApi.createEmbedding({ | ||
model: this.model, | ||
input: texts, | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
86 changes: 86 additions & 0 deletions
86
docs/docs.trychroma.com/markdoc/content/integrations/embedding-models/voyageai.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
--- | ||
id: 'voyageai' | ||
name: 'VoyageAI' | ||
--- | ||
|
||
# VoyageAI | ||
|
||
Chroma also provides a convenient wrapper around VoyageAI's embedding API. This embedding function runs remotely on VoyageAI’s servers, and requires an API key. You can get an API key by signing up for an account at [VoyageAI](https://dash.voyageai.com/). | ||
|
||
{% Tabs %} | ||
{% Tab label="python" %} | ||
|
||
This embedding function relies on the `voyageai` python package, which you can install with `pip install voyageai`. | ||
|
||
```python | ||
import chromadb.utils.embedding_functions as embedding_functions | ||
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction(api_key="YOUR_API_KEY", model_name="voyage-3-large") | ||
voyageai_ef(texts=["document1","document2"]) | ||
``` | ||
|
||
{% /Tab %} | ||
|
||
{% Tab label="typescript" %} | ||
|
||
```typescript | ||
import { VoyageAIEmbeddingFunction } from 'chromadb'; | ||
|
||
const embedder = new VoyageAIEmbeddingFunction("apiKey", "model_name") | ||
|
||
// use directly | ||
const embeddings = embedder.generate(["document1","document2"]) | ||
|
||
// pass documents to query for .add and .query | ||
const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) | ||
const collectionGet = await client.getCollection({name: "name", embeddingFunction: embedder}) | ||
``` | ||
|
||
{% /Tab %} | ||
|
||
{% /Tabs %} | ||
|
||
### Multilingual model example | ||
|
||
{% TabbedCodeBlock %} | ||
|
||
{% Tab label="python" %} | ||
|
||
```python | ||
voyageai_ef = embedding_functions.VoyageAIEmbeddingFunction( | ||
api_key="YOUR_API_KEY", | ||
model_name="voyage-3-large") | ||
|
||
multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!', | ||
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!', | ||
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!', | ||
'Ciao da VoyageAI!', '您好,来自 VoyageAI!', | ||
'कोहिअर से VoyageAI!' ] | ||
|
||
voyageai_ef(texts=multilingual_texts) | ||
|
||
``` | ||
|
||
{% /Tab %} | ||
|
||
{% Tab label="typescript" %} | ||
|
||
```typescript | ||
import { VoyageAIEmbeddingFunction } from 'chromadb'; | ||
|
||
const embedder = new VoyageAIEmbeddingFunction("apiKey", "voyage-3-large") | ||
|
||
multilingual_texts = [ 'Hello from VoyageAI!', 'مرحباً من VoyageAI!!', | ||
'Hallo von VoyageAI!', 'Bonjour de VoyageAI!', | ||
'¡Hola desde VoyageAI!', 'Olá do VoyageAI!', | ||
'Ciao da VoyageAI!', '您好,来自 VoyageAI!', | ||
'कोहिअर से VoyageAI!' ] | ||
|
||
const embeddings = embedder.generate(multilingual_texts) | ||
|
||
``` | ||
|
||
{% /Tab %} | ||
|
||
{% /TabbedCodeBlock %} | ||
|
||
For further details on VoyageAI's models check the [documentation](https://docs.voyageai.com/docs/introduction) and the [blogs](https://blog.voyageai.com/). |