Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update tei embedding format. #1035

Merged
merged 1 commit into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 8 additions & 30 deletions comps/embeddings/tei/langchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,20 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt
Then you need to test your TEI service using the following commands:

```bash
curl localhost:$your_port/embed \
curl localhost:$your_port/v1/embeddings \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-d '{"input":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```

Start the embedding service with the TEI_EMBEDDING_ENDPOINT.

```bash
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings"
export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
python embedding_tei.py
```

#### Start Embedding Service with Local Model

```bash
python local_embedding.py
```

## 🚀2. Start Microservice with Docker (Optional 2)

### 2.1 Start Embedding Service with TEI
Expand All @@ -68,16 +62,16 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt
Then you need to test your TEI service using the following commands:

```bash
curl localhost:$your_port/embed \
curl localhost:$your_port/embed/v1/embeddings \
-X POST \
-d '{"inputs":"What is Deep Learning?"}' \
-d '{"input":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```

Export the `TEI_EMBEDDING_ENDPOINT` for later usage:

```bash
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings"
export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
```

Expand Down Expand Up @@ -113,23 +107,7 @@ curl http://localhost:6000/v1/health_check\

### 3.2 Consume Embedding Service

Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).
The input/output follows [OpenAI API Embeddings](https://platform.openai.com/docs/api-reference/embeddings) format.

```bash
## Input single text
Expand All @@ -141,6 +119,6 @@ curl http://localhost:6000/v1/embeddings\
## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-d '{"input":["Hello, world!","How are you?"], "encoding_format":"base64"}' \
-H 'Content-Type: application/json'
```
55 changes: 15 additions & 40 deletions comps/embeddings/tei/langchain/embedding_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import json
import os
import time
from typing import List, Union
from typing import Dict, List, Union

from huggingface_hub import AsyncInferenceClient

Expand All @@ -19,12 +19,7 @@
statistics_dict,
)
from comps.cores.mega.utils import get_access_token
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)
from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData

logger = CustomLogger("embedding_tei_langchain")
logflag = os.getenv("LOGFLAG", False)
Expand All @@ -45,56 +40,36 @@
port=6000,
)
@register_statistics(names=["opea_service@embedding_tei_langchain"])
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
async def embedding(input: Union[TextDoc, EmbeddingRequest]) -> Union[EmbedDoc, EmbeddingResponse]:
start = time.time()
access_token = (
get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None
)
async_client = get_async_inference_client(access_token)
if logflag:
logger.info(input)

if isinstance(input, TextDoc):
embed_vector = await aembed_query(input.text, async_client)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
embedding_res = await aembed_query({"input": input.text}, async_client)
embedding_vec = [data["embedding"] for data in embedding_res["data"]]
embedding_vec = embedding_vec[0] if isinstance(input.text, str) else embedding_vec
res = EmbedDoc(text=input.text, embedding=embedding_vec)
else:
embed_vector = await aembed_query(input.input, async_client)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
embedding_res = await aembed_query(
{"input": input.input, "encoding_format": input.encoding_format, "model": input.model, "user": input.user},
async_client,
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input
res = EmbeddingResponse(**embedding_res)

statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def aembed_query(
text: Union[str, List[str]], async_client: AsyncInferenceClient, model_kwargs=None, task=None
) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
response = await aembed_documents(texts, async_client, model_kwargs=model_kwargs, task=task)
return response


async def aembed_documents(
texts: List[str], async_client: AsyncInferenceClient, model_kwargs=None, task=None
) -> List[List[float]]:
texts = [text.replace("\n", " ") for text in texts]
_model_kwargs = model_kwargs or {}
responses = await async_client.post(json={"inputs": texts, **_model_kwargs}, task=task)
return json.loads(responses.decode())
async def aembed_query(request: Dict, async_client: AsyncInferenceClient) -> Union[Dict, List[List[float]]]:
response = await async_client.post(json=request)
return json.loads(response.decode())


def get_async_inference_client(access_token: str) -> AsyncInferenceClient:
Expand Down
27 changes: 0 additions & 27 deletions comps/embeddings/tei/langchain/local_embedding_768.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/embeddings/test_embeddings_tei_langchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function start_service() {
model="BAAI/bge-base-en-v1.5"
unset http_proxy
docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}/v1/embeddings"
tei_service_port=5002
docker run -d --name="test-comps-embedding-tei-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps
sleep 3m
Expand Down
Loading