diff --git a/comps/embeddings/tei/langchain/README.md b/comps/embeddings/tei/langchain/README.md index 96163c915..2bbf30cc6 100644 --- a/comps/embeddings/tei/langchain/README.md +++ b/comps/embeddings/tei/langchain/README.md @@ -33,26 +33,20 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt Then you need to test your TEI service using the following commands: ```bash -curl localhost:$your_port/embed \ +curl localhost:$your_port/v1/embeddings \ -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ + -d '{"input":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' ``` Start the embedding service with the TEI_EMBEDDING_ENDPOINT. ```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings" export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" python embedding_tei.py ``` -#### Start Embedding Service with Local Model - -```bash -python local_embedding.py -``` - ## 🚀2. Start Microservice with Docker (Optional 2) ### 2.1 Start Embedding Service with TEI @@ -68,16 +62,16 @@ docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$htt Then you need to test your TEI service using the following commands: ```bash -curl localhost:$your_port/embed \ +curl localhost:$your_port/embed/v1/embeddings \ -X POST \ - -d '{"inputs":"What is Deep Learning?"}' \ + -d '{"input":"What is Deep Learning?"}' \ -H 'Content-Type: application/json' ``` Export the `TEI_EMBEDDING_ENDPOINT` for later usage: ```bash -export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport/v1/embeddings" export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" ``` @@ -113,23 +107,7 @@ curl http://localhost:6000/v1/health_check\ ### 3.2 Consume Embedding Service -Use our basic API. - -```bash -## query with single text -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":"Hello, world!"}' \ - -H 'Content-Type: application/json' - -## query with multiple texts -curl http://localhost:6000/v1/embeddings\ - -X POST \ - -d '{"text":["Hello, world!","How are you?"]}' \ - -H 'Content-Type: application/json' -``` - -We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). +The input/output follows [OpenAI API Embeddings](https://platform.openai.com/docs/api-reference/embeddings) format. ```bash ## Input single text @@ -141,6 +119,6 @@ curl http://localhost:6000/v1/embeddings\ ## Input multiple texts with parameters curl http://localhost:6000/v1/embeddings\ -X POST \ - -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ + -d '{"input":["Hello, world!","How are you?"], "encoding_format":"base64"}' \ -H 'Content-Type: application/json' ``` diff --git a/comps/embeddings/tei/langchain/embedding_tei.py b/comps/embeddings/tei/langchain/embedding_tei.py index 20e61196d..e3b58e376 100644 --- a/comps/embeddings/tei/langchain/embedding_tei.py +++ b/comps/embeddings/tei/langchain/embedding_tei.py @@ -4,7 +4,7 @@ import json import os import time -from typing import List, Union +from typing import Dict, List, Union from huggingface_hub import AsyncInferenceClient @@ -19,12 +19,7 @@ statistics_dict, ) from comps.cores.mega.utils import get_access_token -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - EmbeddingRequest, - EmbeddingResponse, - EmbeddingResponseData, -) +from comps.cores.proto.api_protocol import EmbeddingRequest, EmbeddingResponse, EmbeddingResponseData logger = CustomLogger("embedding_tei_langchain") logflag = os.getenv("LOGFLAG", False) @@ -45,9 +40,7 @@ port=6000, ) @register_statistics(names=["opea_service@embedding_tei_langchain"]) -async def embedding( - input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] -) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: +async def embedding(input: Union[TextDoc, EmbeddingRequest]) -> Union[EmbedDoc, EmbeddingResponse]: start = time.time() access_token = ( get_access_token(TOKEN_URL, CLIENTID, CLIENT_SECRET) if TOKEN_URL and CLIENTID and CLIENT_SECRET else None @@ -55,24 +48,18 @@ async def embedding( async_client = get_async_inference_client(access_token) if logflag: logger.info(input) + if isinstance(input, TextDoc): - embed_vector = await aembed_query(input.text, async_client) - embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector - res = EmbedDoc(text=input.text, embedding=embedding_res) + embedding_res = await aembed_query({"input": input.text}, async_client) + embedding_vec = [data["embedding"] for data in embedding_res["data"]] + embedding_vec = embedding_vec[0] if isinstance(input.text, str) else embedding_vec + res = EmbedDoc(text=input.text, embedding=embedding_vec) else: - embed_vector = await aembed_query(input.input, async_client) - if input.dimensions is not None: - embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))] - - # for standard openai embedding format - res = EmbeddingResponse( - data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] + embedding_res = await aembed_query( + {"input": input.input, "encoding_format": input.encoding_format, "model": input.model, "user": input.user}, + async_client, ) - - if isinstance(input, ChatCompletionRequest): - input.embedding = res - # keep - res = input + res = EmbeddingResponse(**embedding_res) statistics_dict["opea_service@embedding_tei_langchain"].append_latency(time.time() - start, None) if logflag: @@ -80,21 +67,9 @@ async def embedding( return res -async def aembed_query( - text: Union[str, List[str]], async_client: AsyncInferenceClient, model_kwargs=None, task=None -) -> List[List[float]]: - texts = [text] if isinstance(text, str) else text - response = await aembed_documents(texts, async_client, model_kwargs=model_kwargs, task=task) - return response - - -async def aembed_documents( - texts: List[str], async_client: AsyncInferenceClient, model_kwargs=None, task=None -) -> List[List[float]]: - texts = [text.replace("\n", " ") for text in texts] - _model_kwargs = model_kwargs or {} - responses = await async_client.post(json={"inputs": texts, **_model_kwargs}, task=task) - return json.loads(responses.decode()) +async def aembed_query(request: Dict, async_client: AsyncInferenceClient) -> Union[Dict, List[List[float]]]: + response = await async_client.post(json=request) + return json.loads(response.decode()) def get_async_inference_client(access_token: str) -> AsyncInferenceClient: diff --git a/comps/embeddings/tei/langchain/local_embedding_768.py b/comps/embeddings/tei/langchain/local_embedding_768.py deleted file mode 100644 index dae52299b..000000000 --- a/comps/embeddings/tei/langchain/local_embedding_768.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from langchain_community.embeddings import HuggingFaceBgeEmbeddings - -from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, opea_telemetry, register_microservice - - -@register_microservice( - name="opea_service@local_embedding", - service_type=ServiceType.EMBEDDING, - endpoint="/v1/embeddings", - host="0.0.0.0", - port=6000, - input_datatype=TextDoc, - output_datatype=EmbedDoc768, -) -@opea_telemetry -async def embedding(input: TextDoc) -> EmbedDoc768: - embed_vector = await embeddings.aembed_query(input.text) - res = EmbedDoc768(text=input.text, embedding=embed_vector) - return res - - -if __name__ == "__main__": - embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5") - opea_microservices["opea_service@local_embedding"].start() diff --git a/tests/embeddings/test_embeddings_tei_langchain.sh b/tests/embeddings/test_embeddings_tei_langchain.sh index df2642cf1..7c58deadd 100644 --- a/tests/embeddings/test_embeddings_tei_langchain.sh +++ b/tests/embeddings/test_embeddings_tei_langchain.sh @@ -24,7 +24,7 @@ function start_service() { model="BAAI/bge-base-en-v1.5" unset http_proxy docker run -d --name="test-comps-embedding-tei-endpoint" -p $tei_endpoint:80 -v ./data:/data --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model - export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}" + export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:${tei_endpoint}/v1/embeddings" tei_service_port=5002 docker run -d --name="test-comps-embedding-tei-server" -e LOGFLAG=True -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p ${tei_service_port}:6000 --ipc=host -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:comps sleep 3m