diff --git a/comps/embeddings/tei/llama_index/Dockerfile b/comps/embeddings/tei/llama_index/Dockerfile
new file mode 100644
index 000000000..0f411360c
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/Dockerfile
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    python3 \
+    python3-pip
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/embeddings/tei/llama_index/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/embeddings/tei/llama_index
+
+ENTRYPOINT ["python3", "embedding_tei.py"]
diff --git a/comps/embeddings/tei/llama_index/README.md b/comps/embeddings/tei/llama_index/README.md
new file mode 100644
index 000000000..dd1f5006c
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/README.md
@@ -0,0 +1,146 @@
+# Embeddings Microservice with Llama Index TEI
+
+## 🚀1. Start Microservice with Python (Option 1)
+
+Currently, we provide two ways to implement the embedding service:
+
+1. Build the embedding model **_locally_** from the server, which is faster, but takes up memory on the local server.
+
+2. Build it based on the **_TEI endpoint_**, which provides more flexibility, but may bring some network latency.
+
+For both of the implementations, you need to install requirements first.
+
+### 1.1 Install Requirements
+
+```bash
+pip install -r requirements.txt
+```
+
+### 1.2 Start Embedding Service
+
+You can select one of following ways to start the embedding service:
+
+#### Start Embedding Service with TEI
+
+First, you need to start a TEI service.
+
+```bash
+your_port=8090
+model="BAAI/bge-large-en-v1.5"
+docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model
+```
+
+Then you need to test your TEI service using the following commands:
+
+```bash
+curl localhost:$your_port/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+Start the embedding service with the TEI_EMBEDDING_ENDPOINT.
+
+```bash
+export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
+python embedding_tei.py
+```
+
+#### Start Embedding Service with Local Model
+
+```bash
+python local_embedding.py
+```
+
+## 🚀2. Start Microservice with Docker (Optional 2)
+
+### 2.1 Start Embedding Service with TEI
+
+First, you need to start a TEI service.
+
+```bash
+your_port=8090
+model="BAAI/bge-large-en-v1.5"
+docker run -p $your_port:80 -v ./data:/data --name tei_server -e http_proxy=$http_proxy -e https_proxy=$https_proxy --pull always ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 --model-id $model
+```
+
+Then you need to test your TEI service using the following commands:
+
+```bash
+curl localhost:$your_port/embed \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?"}' \
+    -H 'Content-Type: application/json'
+```
+
+Export the `TEI_EMBEDDING_ENDPOINT` for later usage:
+
+```bash
+export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
+```
+
+### 2.2 Build Docker Image
+
+```bash
+cd ../../../../
+docker build -t opea/embedding-tei-llama-index:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/llama_index/Dockerfile .
+```
+
+### 2.3 Run Docker with CLI
+
+```bash
+docker run -d --name="embedding-tei-llama-index-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei-llama-index:latest
+```
+
+### 2.4 Run Docker with Docker Compose
+
+```bash
+cd docker
+docker compose -f docker_compose_embedding.yaml up -d
+```
+
+## 🚀3. Consume Embedding Service
+
+### 3.1 Check Service Status
+
+```bash
+curl http://localhost:6000/v1/health_check\
+  -X GET \
+  -H 'Content-Type: application/json'
+```
+
+### 3.2 Consume Embedding Service
+
+Use our basic API.
+
+```bash
+## query with single text
+curl http://localhost:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":"Hello, world!"}' \
+  -H 'Content-Type: application/json'
+
+## query with multiple texts
+curl http://localhost:6000/v1/embeddings\
+  -X POST \
+  -d '{"text":["Hello, world!","How are you?"]}' \
+  -H 'Content-Type: application/json'
+```
+
+We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).
+
+```bash
+## Input single text
+curl http://localhost:6000/v1/embeddings\
+  -X POST \
+  -d '{"input":"Hello, world!"}' \
+  -H 'Content-Type: application/json'
+
+## Input multiple texts with parameters
+curl http://localhost:6000/v1/embeddings\
+  -X POST \
+  -d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
+  -H 'Content-Type: application/json'
+```
diff --git a/comps/embeddings/tei/llama_index/__init__.py b/comps/embeddings/tei/llama_index/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml b/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml
new file mode 100644
index 000000000..152f5030b
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/docker_compose_embedding.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  embedding:
+    image: opea/embedding-tei-llama-index:latest
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
diff --git a/comps/embeddings/tei/llama_index/embedding_tei.py b/comps/embeddings/tei/llama_index/embedding_tei.py
new file mode 100644
index 000000000..a3ff25a70
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/embedding_tei.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from typing import List, Union
+
+from llama_index.embeddings.text_embeddings_inference import TextEmbeddingsInference
+
+from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice
+from comps.cores.proto.api_protocol import (
+    ChatCompletionRequest,
+    EmbeddingRequest,
+    EmbeddingResponse,
+    EmbeddingResponseData,
+)
+
+logger = CustomLogger("embedding_tei_llamaindex")
+logflag = os.getenv("LOGFLAG", False)
+
+
+@register_microservice(
+    name="opea_service@embedding_tei_llamaindex",
+    service_type=ServiceType.EMBEDDING,
+    endpoint="/v1/embeddings",
+    host="0.0.0.0",
+    port=6000,
+    input_datatype=TextDoc,
+    output_datatype=EmbedDoc,
+)
+async def embedding(
+    input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
+) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
+    if logflag:
+        logger.info(input)
+    if isinstance(input, TextDoc):
+        embed_vector = await get_embeddings(input.text)
+        embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
+        res = EmbedDoc(text=input.text, embedding=embedding_res)
+    else:
+        embed_vector = await get_embeddings(input.input)
+        if input.dimensions is not None:
+            embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]
+
+        # for standard openai embedding format
+        res = EmbeddingResponse(
+            data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
+        )
+
+        if isinstance(input, ChatCompletionRequest):
+            input.embedding = res
+            # keep
+            res = input
+
+    if logflag:
+        logger.info(res)
+    return res
+
+
+async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
+    texts = [text] if isinstance(text, str) else text
+    embed_vector = await embeddings._aget_text_embeddings(texts)
+    return embed_vector
+
+
+if __name__ == "__main__":
+    tei_embedding_model_name = os.getenv("TEI_EMBEDDING_MODEL_NAME", "BAAI/bge-base-en-v1.5")
+    tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT", "http://localhost:8090")
+    embeddings = TextEmbeddingsInference(model_name=tei_embedding_model_name, base_url=tei_embedding_endpoint)
+    logger.info("TEI Gaudi Embedding initialized.")
+    opea_microservices["opea_service@embedding_tei_llamaindex"].start()
diff --git a/comps/embeddings/tei/llama_index/local_embedding.py b/comps/embeddings/tei/llama_index/local_embedding.py
new file mode 100644
index 000000000..ba9d3dd5a
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/local_embedding.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding
+
+from comps import CustomLogger, EmbedDoc, ServiceType, TextDoc, opea_microservices, register_microservice
+
+logger = CustomLogger("local_embedding")
+logflag = os.getenv("LOGFLAG", False)
+
+
+@register_microservice(
+    name="opea_service@local_embedding",
+    service_type=ServiceType.EMBEDDING,
+    endpoint="/v1/embeddings",
+    host="0.0.0.0",
+    port=6000,
+    input_datatype=TextDoc,
+    output_datatype=EmbedDoc,
+)
+async def embedding(input: TextDoc) -> EmbedDoc:
+    if logflag:
+        logger.info(input)
+    embed_vector = await embeddings.aget_query_embedding(input.text)
+    res = EmbedDoc(text=input.text, embedding=embed_vector)
+    if logflag:
+        logger.info(res)
+    return res
+
+
+if __name__ == "__main__":
+    embeddings = HuggingFaceInferenceAPIEmbedding(model_name="BAAI/bge-base-en-v1.5")
+    opea_microservices["opea_service@local_embedding"].start()
diff --git a/comps/embeddings/tei/llama_index/requirements.txt b/comps/embeddings/tei/llama_index/requirements.txt
new file mode 100644
index 000000000..4f1457e4a
--- /dev/null
+++ b/comps/embeddings/tei/llama_index/requirements.txt
@@ -0,0 +1,11 @@
+docarray[full]
+fastapi
+huggingface_hub
+llama-index-embeddings-huggingface-api
+llama-index-embeddings-text-embeddings-inference
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
+uvicorn