diff --git a/comps/embeddings/tei/llama_index/Dockerfile b/comps/embeddings/tei/llama_index/Dockerfile new file mode 100644 index 000000000..0f411360c --- /dev/null +++ b/comps/embeddings/tei/llama_index/Dockerfile @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM ubuntu:22.04 + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + python3 \ + python3-pip + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user + +COPY comps /home/user/comps + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/comps/embeddings/tei/llama_index/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/embeddings/tei/llama_index + +ENTRYPOINT ["python3", "embedding_tei.py"] diff --git a/comps/embeddings/tei/llama_index/README.md b/comps/embeddings/tei/llama_index/README.md new file mode 100644 index 000000000..dd1f5006c --- /dev/null +++ b/comps/embeddings/tei/llama_index/README.md @@ -0,0 +1,146 @@ +# Embeddings Microservice with Llama Index TEI + +## 🚀1. Start Microservice with Python (Option 1) + +Currently, we provide two ways to implement the embedding service: + +1. Build the embedding model **_locally_** from the server, which is faster, but takes up memory on the local server. + +2. Build it based on the **_TEI endpoint_**, which provides more flexibility, but may bring some network latency. + +For both of the implementations, you need to install requirements first. + +### 1.1 Install Requirements + +```bash +pip install -r requirements.txt +``` + +### 1.2 Start Embedding Service + +You can select one of following ways to start the embedding service: + +#### Start Embedding Service with TEI + +First, you need to start a TEI service. + +```bash +your_port=8090 +model="BAAI/bge-large-en-v1.5" +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model +``` + +Then you need to test your TEI service using the following commands: + +```bash +curl localhost:$your_port/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +Start the embedding service with the TEI_EMBEDDING_ENDPOINT. + +```bash +export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" +python embedding_tei.py +``` + +#### Start Embedding Service with Local Model + +```bash +python local_embedding.py +``` + +## 🚀2. Start Microservice with Docker (Optional 2) + +### 2.1 Start Embedding Service with TEI + +First, you need to start a TEI service. + +```bash +your_port=8090 +model="BAAI/bge-large-en-v1.5" +docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model +``` + +Then you need to test your TEI service using the following commands: + +```bash +curl localhost:$your_port/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' +``` + +Export the `TEI_EMBEDDING_ENDPOINT` for later usage: + +```bash +export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport" +export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5" +``` + +### 2.2 Build Docker Image + +```bash +cd ../../../../ +docker build -t opea/embedding-tei-llama-index:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/llama_index/Dockerfile . +``` + +### 2.3 Run Docker with CLI + +```bash +docker run -d --name="embedding-tei-llama-index-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei-llama-index:latest +``` + +### 2.4 Run Docker with Docker Compose + +```bash +cd docker +docker compose -f docker_compose_embedding.yaml up -d +``` + +## 🚀3. Consume Embedding Service + +### 3.1 Check Service Status + +```bash +curl http://localhost:6000/v1/health_check\ + -X GET \ + -H 'Content-Type: application/json' +``` + +### 3.2 Consume Embedding Service + +Use our basic API. + +```bash +## query with single text +curl http://localhost:6000/v1/embeddings\ + -X POST \ + -d '{"text":"Hello, world!"}' \ + -H 'Content-Type: application/json' + +## query with multiple texts +curl http://localhost:6000/v1/embeddings\ + -X POST \ + -d '{"text":["Hello, world!","How are you?"]}' \ + -H 'Content-Type: application/json' +``` + +We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings). + +```bash +## Input single text +curl http://localhost:6000/v1/embeddings\ + -X POST \ + -d '{"input":"Hello, world!"}' \ + -H 'Content-Type: application/json' + +## Input multiple texts with parameters +curl http://localhost:6000/v1/embeddings\ + -X POST \ + -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \ + -H 'Content-Type: application/json' +``` diff --git a/comps/embeddings/tei/llama_index/__init__.py b/comps/embeddings/tei/llama_index/__init__.py new file mode 100644 index 000000000..916f3a44b --- /dev/null +++ b/comps/embeddings/tei/llama_index/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml b/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml new file mode 100644 index 000000000..152f5030b --- /dev/null +++ b/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +version: "3.8" + +services: + embedding: + image: opea/embedding-tei-llama-index:latest + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME} + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/comps/embeddings/tei/llama_index/embedding_tei.py b/comps/embeddings/tei/llama_index/embedding_tei.py new file mode 100644 index 000000000..a3ff25a70 --- /dev/null +++ b/comps/embeddings/tei/llama_index/embedding_tei.py @@ -0,0 +1,70 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from typing import List, Union + +from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference + +from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps.cores.proto.api_protocol import ( + ChatCompletionRequest, + EmbeddingRequest, + EmbeddingResponse, + EmbeddingResponseData, +) + +logger = CustomLogger("embedding_tei_llamaindex") +logflag = os.getenv("LOGFLAG", False) + + +@register_microservice( + name="opea_service@embedding_tei_llamaindex", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc, +) +async def embedding( + input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest] +) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]: + if logflag: + logger.info(input) + if isinstance(input, TextDoc): + embed_vector = await get_embeddings(input.text) + embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector + res = EmbedDoc(text=input.text, embedding=embedding_res) + else: + embed_vector = await get_embeddings(input.input) + if input.dimensions is not None: + embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))] + + # for standard openai embedding format + res = EmbeddingResponse( + data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))] + ) + + if isinstance(input, ChatCompletionRequest): + input.embedding = res + # keep + res = input + + if logflag: + logger.info(res) + return res + + +async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]: + texts = [text] if isinstance(text, str) else text + embed_vector = await embeddings._aget_text_embeddings(texts) + return embed_vector + + +if __name__ == "__main__": + tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-base-en-v1.5") + tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090") + embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint) + logger.info("TEI Gaudi Embedding initialized.") + opea_microservices["opea_service@embedding_tei_llamaindex"].start() diff --git a/comps/embeddings/tei/llama_index/local_embedding.py b/comps/embeddings/tei/llama_index/local_embedding.py new file mode 100644 index 000000000..ba9d3dd5a --- /dev/null +++ b/comps/embeddings/tei/llama_index/local_embedding.py @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding + +from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice + +logger = CustomLogger("local_embedding") +logflag = os.getenv("LOGFLAG", False) + + +@register_microservice( + name="opea_service@local_embedding", + service_type=ServiceType.EMBEDDING, + endpoint="/v1/embeddings", + host="0.0.0.0", + port=6000, + input_datatype=TextDoc, + output_datatype=EmbedDoc, +) +async def embedding(input: TextDoc) -> EmbedDoc: + if logflag: + logger.info(input) + embed_vector = await embeddings.aget_query_embedding(input.text) + res = EmbedDoc(text=input.text, embedding=embed_vector) + if logflag: + logger.info(res) + return res + + +if __name__ == "__main__": + embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-base-en-v1.5") + opea_microservices["opea_service@local_embedding"].start() diff --git a/comps/embeddings/tei/llama_index/requirements.txt b/comps/embeddings/tei/llama_index/requirements.txt new file mode 100644 index 000000000..4f1457e4a --- /dev/null +++ b/comps/embeddings/tei/llama_index/requirements.txt @@ -0,0 +1,11 @@ +docarray[full] +fastapi +huggingface_hub +llama-index-embeddings-huggingface-api +llama-index-embeddings-text-embeddings-inference +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +shortuuid +uvicorn