diff --git a/.github/code_spell_ignore.txt b/.github/code_spell_ignore.txt
index e69de29bb..047d573d2 100644
--- a/.github/code_spell_ignore.txt
+++ b/.github/code_spell_ignore.txt
@@ -0,0 +1,2 @@
+ModelIn
+modelin
\ No newline at end of file
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
index cfed39c95..9b50d9349 100644
--- a/.github/workflows/_example-workflow.yml
+++ b/.github/workflows/_example-workflow.yml
@@ -77,6 +77,10 @@ jobs:
               git clone https://github.com/vllm-project/vllm.git
               cd vllm && git rev-parse HEAD && cd ../
           fi
+          if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then
+               git clone https://github.com/HabanaAI/vllm-fork.git
+               cd vllm-fork && git rev-parse HEAD && cd ../
+          fi
           git clone https://github.com/opea-project/GenAIComps.git
           cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
 
diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
new file mode 100644
index 000000000..4972f398d
--- /dev/null
+++ b/.github/workflows/check-online-doc-build.yml
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Online Document Building
+permissions: {}
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - "**.md"
+      - "**.rst"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        path: GenAIExamples
+
+    - name: Checkout docs
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/docs
+        path: docs
+
+    - name: Build Online Document
+      shell: bash
+      run: |
+        echo "build online doc"
+        cd docs
+        bash scripts/build.sh
diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml
index 544c69924..d30562224 100644
--- a/.github/workflows/nightly-docker-build-publish.yml
+++ b/.github/workflows/nightly-docker-build-publish.yml
@@ -42,7 +42,6 @@ jobs:
     with:
       node: gaudi
       example: ${{ matrix.example }}
-      inject_commit: true
     secrets: inherit
 
   get-image-list:
diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml
index 3a1c6b0e7..c314bd614 100644
--- a/.github/workflows/pr-path-detection.yml
+++ b/.github/workflows/pr-path-detection.yml
@@ -68,7 +68,7 @@ jobs:
                   # echo $url_line
                   url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
                   path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
-                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
+                  response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
                   if [ "$response" -ne 200 ]; then
                     echo "**********Validation failed, try again**********"
                     response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
index d3237e2fe..a922ec031 100644
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -26,7 +26,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -227,7 +227,7 @@ For users in China who are unable to download models directly from Huggingface,
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -257,12 +257,6 @@ If use vllm for llm backend.
 docker compose -f compose_vllm.yaml up -d
 ```
 
-If use vllm-on-ray for llm backend.
-
-```bash
-docker compose -f compose_vllm_ray.yaml up -d
-```
-
 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
 
 ```bash
@@ -351,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
      }'
    ```
 
-   ```bash
-   #vLLM-on-Ray Service
-   curl http://${host_ip}:8006/v1/chat/completions \
-     -H "Content-Type: application/json" \
-     -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
-   ```
-
 5. MegaService
 
    ```bash
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
deleted file mode 100644
index 1f067e77e..000000000
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-services:
-  redis-vector-db:
-    image: redis/redis-stack:7.2.0-v9
-    container_name: redis-vector-db
-    ports:
-      - "6379:6379"
-      - "8001:8001"
-  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
-    container_name: dataprep-redis-server
-    depends_on:
-      - redis-vector-db
-      - tei-embedding-service
-    ports:
-      - "6007:6007"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      REDIS_URL: redis://redis-vector-db:6379
-      REDIS_HOST: redis-vector-db
-      INDEX_NAME: ${INDEX_NAME}
-      TEI_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-  tei-embedding-service:
-    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    container_name: tei-embedding-gaudi-server
-    ports:
-      - "8090:80"
-    volumes:
-      - "./data:/data"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
-  retriever:
-    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
-    container_name: retriever-redis-server
-    depends_on:
-      - redis-vector-db
-    ports:
-      - "7000:7000"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      REDIS_URL: redis://redis-vector-db:6379
-      REDIS_HOST: redis-vector-db
-      INDEX_NAME: ${INDEX_NAME}
-      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-    restart: unless-stopped
-  tei-reranking-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
-    container_name: tei-reranking-gaudi-server
-    ports:
-      - "8808:80"
-    volumes:
-      - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
-    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
-  vllm-ray-service:
-    image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
-    container_name: vllm-ray-gaudi-server
-    ports:
-      - "8006:8000"
-    volumes:
-      - "./data:/data"
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      LLM_MODEL_ID: ${LLM_MODEL_ID}
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
-  chatqna-gaudi-backend-server:
-    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
-    container_name: chatqna-gaudi-backend-server
-    depends_on:
-      - redis-vector-db
-      - tei-embedding-service
-      - retriever
-      - tei-reranking-service
-      - vllm-ray-service
-    ports:
-      - "8888:8888"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
-      - EMBEDDING_SERVER_HOST_IP=tei-embedding-service
-      - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
-      - RETRIEVER_SERVICE_HOST_IP=retriever
-      - RERANK_SERVER_HOST_IP=tei-reranking-service
-      - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
-      - LLM_SERVER_HOST_IP=vllm-ray-service
-      - LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
-      - LLM_MODEL=${LLM_MODEL_ID}
-      - LOGFLAG=${LOGFLAG}
-    ipc: host
-    restart: always
-  chatqna-gaudi-ui-server:
-    image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
-    container_name: chatqna-gaudi-ui-server
-    depends_on:
-      - chatqna-gaudi-backend-server
-    ports:
-      - "5173:5173"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-    ipc: host
-    restart: always
-  chatqna-gaudi-nginx-server:
-    image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
-    container_name: chatqna-gaudi-nginx-server
-    depends_on:
-      - chatqna-gaudi-backend-server
-      - chatqna-gaudi-ui-server
-    ports:
-      - "${NGINX_PORT:-80}:80"
-    environment:
-      - no_proxy=${no_proxy}
-      - https_proxy=${https_proxy}
-      - http_proxy=${http_proxy}
-      - FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
-      - FRONTEND_SERVICE_PORT=5173
-      - BACKEND_SERVICE_NAME=chatqna
-      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
-      - BACKEND_SERVICE_PORT=8888
-      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
-    ipc: host
-    restart: always
-
-networks:
-  default:
-    driver: bridge
diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md
index 31ab0549b..24eb39f98 100644
--- a/ChatQnA/docker_compose/nvidia/gpu/README.md
+++ b/ChatQnA/docker_compose/nvidia/gpu/README.md
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    ```
 
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
    ```
 
 3. Set up other environment variables:
@@ -156,8 +156,6 @@ Change the `xxx_MODEL_ID` below for your needs.
    ```bash
    # Example: host_ip="192.168.1.1"
    export host_ip="External_Public_IP"
-   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy"
    export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
    # Example: NGINX_PORT=80
    export NGINX_PORT=${your_nginx_port}
@@ -168,6 +166,8 @@ Change the `xxx_MODEL_ID` below for your needs.
    ```bash
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
+   # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
+   export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
    ```
 
 3. Set up other environment variables:
diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml
index 9684c0183..b84fa0796 100644
--- a/ChatQnA/docker_image_build/build.yaml
+++ b/ChatQnA/docker_image_build/build.yaml
@@ -77,12 +77,6 @@ services:
       dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
     extends: chatqna
     image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
-  llm-vllm-ray-hpu:
-    build:
-      context: GenAIComps
-      dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile
-    extends: chatqna
-    image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
   dataprep-redis:
     build:
       context: GenAIComps
diff --git a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh b/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh
deleted file mode 100644
index d7d1dbe6b..000000000
--- a/ChatQnA/tests/test_compose_vllm_ray_on_gaudi.sh
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/bin/bash
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-set -e
-IMAGE_REPO=${IMAGE_REPO:-"opea"}
-IMAGE_TAG=${IMAGE_TAG:-"latest"}
-echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
-echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
-export REGISTRY=${IMAGE_REPO}
-export TAG=${IMAGE_TAG}
-
-WORKPATH=$(dirname "$PWD")
-LOG_PATH="$WORKPATH/tests"
-ip_address=$(hostname -I | awk '{print $1}')
-
-function build_docker_images() {
-    cd $WORKPATH/docker_image_build
-    git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
-    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu nginx"
-    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
-
-    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/tei-gaudi:latest
-    docker images && sleep 1s
-}
-
-function start_services() {
-
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-    export RERANK_MODEL_ID="BAAI/bge-reranker-base"
-    export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
-    export INDEX_NAME="rag-redis"
-    export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
-
-    # Start Docker Containers
-    docker compose -f compose_vllm_ray.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
-    n=0
-    until [[ "$n" -ge 100 ]]; do
-        echo "n=$n"
-        docker logs vllm-ray-gaudi-server > vllm_ray_service_start.log
-        if grep -q "Warmup finished" vllm_ray_service_start.log; then
-            break
-        fi
-        sleep 5s
-        n=$((n+1))
-    done
-}
-
-function validate_services() {
-    local URL="$1"
-    local EXPECTED_RESULT="$2"
-    local SERVICE_NAME="$3"
-    local DOCKER_NAME="$4"
-    local INPUT_DATA="$5"
-
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
-
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
-        else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-            exit 1
-        fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
-    fi
-    sleep 1s
-}
-
-function validate_microservices() {
-    # Check if the microservices are running correctly.
-
-    # tei for embedding service
-    validate_services \
-        "${ip_address}:8090/embed" \
-        "\[\[" \
-        "tei-embedding" \
-        "tei-embedding-gaudi-server" \
-        '{"inputs":"What is Deep Learning?"}'
-
-    sleep 1m # retrieval can't curl as expected, try to wait for more time
-
-    # retrieval microservice
-    test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
-    validate_services \
-        "${ip_address}:7000/v1/retrieval" \
-        " " \
-        "retrieval" \
-        "retriever-redis-server" \
-        "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
-
-    # tei for rerank microservice
-    validate_services \
-        "${ip_address}:8808/rerank" \
-        '{"index":1,"score":' \
-        "tei-rerank" \
-        "tei-reranking-gaudi-server" \
-        '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
-
-    # vllm-on-ray for llm service
-    validate_services \
-        "${ip_address}:8006/v1/chat/completions" \
-        "content" \
-        "vllm-ray-llm" \
-        "vllm-ray-gaudi-server" \
-        '{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
-}
-
-function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-        "${ip_address}:8888/v1/chatqna" \
-        "data: " \
-        "mega-chatqna" \
-        "chatqna-gaudi-backend-server" \
-        '{"messages": "What is the revenue of Nike in 2023?"}'
-
-}
-
-function validate_frontend() {
-    cd $WORKPATH/ui/svelte
-    local conda_env_name="OPEA_e2e"
-    export PATH=${HOME}/miniforge3/bin/:$PATH
-    if conda info --envs | grep -q "$conda_env_name"; then
-        echo "$conda_env_name exist!"
-    else
-        conda create -n ${conda_env_name} python=3.12 -y
-    fi
-    source activate ${conda_env_name}
-
-    sed -i "s/localhost/$ip_address/g" playwright.config.ts
-
-    conda install -c conda-forge nodejs -y
-    npm install && npm ci && npx playwright install --with-deps
-    node -v && npm -v && pip list
-
-    exit_status=0
-    npx playwright test || exit_status=$?
-
-    if [ $exit_status -ne 0 ]; then
-        echo "[TEST INFO]: ---------frontend test failed---------"
-        exit $exit_status
-    else
-        echo "[TEST INFO]: ---------frontend test passed---------"
-    fi
-}
-
-function stop_docker() {
-    cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    docker compose -f compose_vllm_ray.yaml down
-}
-
-function main() {
-
-    stop_docker
-    if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
-    start_time=$(date +%s)
-    start_services
-    end_time=$(date +%s)
-    duration=$((end_time-start_time))
-    echo "Mega service start duration is $duration s"
-
-    validate_microservices
-    validate_megaservice
-    # validate_frontend
-
-    stop_docker
-    echo y | docker system prune
-
-}
-
-main
diff --git a/EdgeCraftRAG/Dockerfile b/EdgeCraftRAG/Dockerfile
new file mode 100644
index 000000000..a15136464
--- /dev/null
+++ b/EdgeCraftRAG/Dockerfile
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY ./edgecraftrag /home/user/edgecraftrag
+COPY ./chatqna.py /home/user/chatqna.py
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user
+
+USER user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "chatqna.py"]
\ No newline at end of file
diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server
new file mode 100644
index 000000000..c04dc0a54
--- /dev/null
+++ b/EdgeCraftRAG/Dockerfile.server
@@ -0,0 +1,35 @@
+FROM python:3.11-slim
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN apt-get update && apt-get install -y gnupg wget 
+RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+    gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
+RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \
+    tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+RUN apt-get update
+RUN apt-get install -y \
+    intel-opencl-icd intel-level-zero-gpu level-zero intel-level-zero-gpu-raytracing \
+    intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
+    libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
+    libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
+    mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/ 
+
+COPY ./edgecraftrag /home/user/edgecraftrag
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user/
+
+USER user
+
+ENTRYPOINT ["python", "-m", "edgecraftrag.server"]
diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md
new file mode 100644
index 000000000..da8d2efb0
--- /dev/null
+++ b/EdgeCraftRAG/README.md
@@ -0,0 +1,274 @@
+# Edge Craft Retrieval-Augmented Generation
+
+Edge Craft RAG (EC-RAG) is a customizable, tunable and production-ready
+Retrieval-Augmented Generation system for edge solutions. It is designed to
+curate the RAG pipeline to meet hardware requirements at edge with guaranteed
+quality and performance.
+
+## Quick Start Guide
+
+### Run Containers with Docker Compose
+
+```bash
+cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
+
+export MODEL_PATH="your model path for all your models"
+export DOC_PATH="your doc path for uploading a dir of files"
+export HOST_IP="your host ip"
+export UI_SERVICE_PORT="port for UI service"
+
+# Optional for vllm endpoint
+export vLLM_ENDPOINT="http://${HOST_IP}:8008"
+
+# If you have a proxy configured, uncomment below line
+# export no_proxy=$no_proxy,${HOST_IP},edgecraftrag,edgecraftrag-server
+# If you have a HF mirror configured, it will be imported to the container
+# export HF_ENDPOINT="your HF mirror endpoint"
+
+# By default, the ports of the containers are set, uncomment if you want to change
+# export MEGA_SERVICE_PORT=16011
+# export PIPELINE_SERVICE_PORT=16011
+
+docker compose up -d
+```
+
+### (Optional) Build Docker Images for Mega Service, Server and UI by your own
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag:latest -f Dockerfile .
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-server:latest -f Dockerfile.server .
+docker build --build-arg http_proxy=$HTTP_PROXY --build-arg https_proxy=$HTTPS_PROXY --build-arg no_proxy=$NO_PROXY -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui .
+```
+
+### ChatQnA with LLM Example (Command Line)
+
+```bash
+cd GenAIExamples/EdgeCraftRAG
+
+# Activate pipeline test_pipeline_local_llm
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
+
+# Will need to wait for several minutes
+# Expected output:
+# {
+#   "idx": "3214cf25-8dff-46e6-b7d1-1811f237cf8c",
+#   "name": "rag_test",
+#   "comp_type": "pipeline",
+#   "node_parser": {
+#     "idx": "ababed12-c192-4cbb-b27e-e49c76a751ca",
+#     "parser_type": "simple",
+#     "chunk_size": 400,
+#     "chunk_overlap": 48
+#   },
+#   "indexer": {
+#     "idx": "46969b63-8a32-4142-874d-d5c86ee9e228",
+#     "indexer_type": "faiss_vector",
+#     "model": {
+#       "idx": "7aae57c0-13a4-4a15-aecb-46c2ec8fe738",
+#       "type": "embedding",
+#       "model_id": "BAAI/bge-small-en-v1.5",
+#       "model_path": "/home/user/models/bge_ov_embedding",
+#       "device": "auto"
+#     }
+#   },
+#   "retriever": {
+#     "idx": "3747fa59-ff9b-49b6-a8e8-03cdf8c979a4",
+#     "retriever_type": "vectorsimilarity",
+#     "retrieve_topk": 30
+#   },
+#   "postprocessor": [
+#     {
+#       "idx": "d46a6cae-ba7a-412e-85b7-d334f175efaa",
+#       "postprocessor_type": "reranker",
+#       "model": {
+#         "idx": "374e7471-bd7d-41d0-b69d-a749a052b4b0",
+#         "type": "reranker",
+#         "model_id": "BAAI/bge-reranker-large",
+#         "model_path": "/home/user/models/bge_ov_reranker",
+#         "device": "auto"
+#       },
+#       "top_n": 2
+#     }
+#   ],
+#   "generator": {
+#     "idx": "52d8f112-6290-4dd3-bc28-f9bd5deeb7c8",
+#     "generator_type": "local",
+#     "model": {
+#       "idx": "fa0c11e1-46d1-4df8-a6d8-48cf6b99eff3",
+#       "type": "llm",
+#       "model_id": "qwen2-7b-instruct",
+#       "model_path": "/home/user/models/qwen2-7b-instruct/INT4_compressed_weights",
+#       "device": "auto"
+#     }
+#   },
+#   "status": {
+#     "active": true
+#   }
+# }
+
+# Prepare data from local directory
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"#REPLACE WITH YOUR LOCAL DOC DIR#"}' | jq '.'
+
+# Validate Mega Service
+curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.'
+```
+
+### ChatQnA with LLM Example (UI)
+
+Open your browser, access http://${HOST_IP}:8082
+
+> Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}.
+
+### (Optional) Launch vLLM with OpenVINO service
+
+```bash
+# 1. export LLM_MODEL
+export LLM_MODEL="your model id"
+# 2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
+  # vllm-service:
+  #   image: vllm:openvino
+  #   container_name: vllm-openvino-server
+  #   depends_on:
+  #     - vllm-service
+  #   ports:
+  #     - "8008:80"
+  #   environment:
+  #     no_proxy: ${no_proxy}
+  #     http_proxy: ${http_proxy}
+  #     https_proxy: ${https_proxy}
+  #     vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+  #     LLM_MODEL: ${LLM_MODEL}
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --host 0.0.0.0 \
+  #       --port 80"
+```
+
+## Advanced User Guide
+
+### Pipeline Management
+
+#### Create a pipeline
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline.json | jq '.'
+```
+
+It will take some time to prepare the embedding model.
+
+#### Upload a text
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+```
+
+#### Provide a query to retrieve context with similarity search.
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d @examples/test_query.json | jq '.'
+```
+
+#### Create the second pipeline test2
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @examples/test_pipeline2.json | jq '.'
+```
+
+#### Check all pipelines
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.'
+```
+
+#### Compare similarity retrieval (test1) and keyword retrieval (test2)
+
+```bash
+# Activate pipeline test1
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+# Similarity retrieval
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
+
+# Activate pipeline test2
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test2 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+# Keyword retrieval
+curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"number"}' | jq '.'
+
+```
+
+### Model Management
+
+#### Load a model
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d @examples/test_model_load.json | jq '.'
+```
+
+It will take some time to load the model.
+
+#### Check all models
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" | jq '.'
+```
+
+#### Update a model
+
+```bash
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d @examples/test_model_update.json | jq '.'
+```
+
+#### Check a certain model
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.'
+```
+
+#### Delete a model
+
+```bash
+curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.'
+```
+
+### File Management
+
+#### Add a text
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data.json | jq '.'
+```
+
+#### Add files from existed file path
+
+```bash
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_dir.json | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+```
+
+#### Check all files
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/data/files -H "Content-Type: application/json" | jq '.'
+```
+
+#### Check one file
+
+```bash
+curl -X GET http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.'
+```
+
+#### Delete a file
+
+```bash
+curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.'
+```
+
+#### Update a file
+
+```bash
+curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d @examples/test_data_file.json | jq '.'
+```
diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py
new file mode 100644
index 000000000..1afa9621c
--- /dev/null
+++ b/EdgeCraftRAG/chatqna.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011))
+PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1")
+PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+
+from comps import Gateway, MegaServiceEndpoint
+from comps.cores.proto.api_protocol import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionResponseChoice,
+    ChatMessage,
+    UsageInfo,
+)
+from fastapi import Request
+from fastapi.responses import StreamingResponse
+
+
+class EdgeCraftRagGateway(Gateway):
+    def __init__(self, megaservice, host="0.0.0.0", port=16011):
+        super().__init__(
+            megaservice, host, port, str(MegaServiceEndpoint.CHAT_QNA), ChatCompletionRequest, ChatCompletionResponse
+        )
+
+    async def handle_request(self, request: Request):
+        input = await request.json()
+        result_dict, runtime_graph = await self.megaservice.schedule(initial_inputs=input)
+        for node, response in result_dict.items():
+            if isinstance(response, StreamingResponse):
+                return response
+        last_node = runtime_graph.all_leaves()[-1]
+        response = result_dict[last_node]
+        choices = []
+        usage = UsageInfo()
+        choices.append(
+            ChatCompletionResponseChoice(
+                index=0,
+                message=ChatMessage(role="assistant", content=response),
+                finish_reason="stop",
+            )
+        )
+        return ChatCompletionResponse(model="edgecraftrag", choices=choices, usage=usage)
+
+
+class EdgeCraftRagService:
+    def __init__(self, host="0.0.0.0", port=16010):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        edgecraftrag = MicroService(
+            name="pipeline",
+            host=PIPELINE_SERVICE_HOST_IP,
+            port=PIPELINE_SERVICE_PORT,
+            endpoint="/v1/chatqna",
+            use_remote_service=True,
+            service_type=ServiceType.UNDEFINED,
+        )
+        self.megaservice.add(edgecraftrag)
+        self.gateway = EdgeCraftRagGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    edgecraftrag = EdgeCraftRagService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    edgecraftrag.add_remote_service()
diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
new file mode 100644
index 000000000..f877b7c58
--- /dev/null
+++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  server:
+    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
+    container_name: edgecraftrag-server
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_ENDPOINT: ${HF_ENDPOINT}
+      vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+    volumes:
+      - ${MODEL_PATH:-${PWD}}:/home/user/models
+      - ${DOC_PATH:-${PWD}}:/home/user/docs
+    ports:
+      - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
+    devices:
+      - /dev/dri:/dev/dri
+    group_add:
+      - video
+  ecrag:
+    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
+    container_name: edgecraftrag
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+    ports:
+      - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
+    depends_on:
+      - server
+  ui:
+    image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
+    container_name: edgecraftrag-ui
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
+      PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
+      PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
+      UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
+      UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
+    ports:
+        - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
+    restart: always
+    depends_on:
+      - server
+      - ecrag
+  # vllm-service:
+  #   image: vllm:openvino
+  #   container_name: vllm-openvino-server
+  #   ports:
+  #     - "8008:80"
+  #   environment:
+  #     no_proxy: ${no_proxy}
+  #     http_proxy: ${http_proxy}
+  #     https_proxy: ${https_proxy}
+  #     vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+  #     LLM_MODEL: ${LLM_MODEL}
+  #   entrypoint: /bin/bash -c "\
+  #     cd / && \
+  #     export VLLM_CPU_KVCACHE_SPACE=50 && \
+  #     python3 -m vllm.entrypoints.openai.api_server \
+  #       --model '${LLM_MODEL}' \
+  #       --host 0.0.0.0 \
+  #       --port 80"
+
+networks:
+  default:
+    driver: bridge
diff --git a/EdgeCraftRAG/docker_image_build/build.yaml b/EdgeCraftRAG/docker_image_build/build.yaml
new file mode 100644
index 000000000..e0cc355cc
--- /dev/null
+++ b/EdgeCraftRAG/docker_image_build/build.yaml
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  server:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./Dockerfile.server
+    image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
+  ui:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./ui/docker/Dockerfile.ui
+    image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
+  ecrag:
+    build:
+      context: ..
+      args:
+        http_proxy: ${http_proxy}
+        https_proxy: ${https_proxy}
+      dockerfile: ./Dockerfile
+    image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
diff --git a/EdgeCraftRAG/edgecraftrag/__init__.py b/EdgeCraftRAG/edgecraftrag/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/__init__.py b/EdgeCraftRAG/edgecraftrag/api/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
new file mode 100644
index 000000000..dfd32c29e
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+chatqna_app = FastAPI()
+
+
+# Retrieval
+@chatqna_app.post(path="/v1/retrieval")
+async def retrieval(request: ChatCompletionRequest):
+    nodeswithscore = ctx.get_pipeline_mgr().run_retrieve(chat_request=request)
+    print(nodeswithscore)
+    if nodeswithscore is not None:
+        ret = []
+        for n in nodeswithscore:
+            ret.append((n.node.node_id, n.node.text, n.score))
+        return ret
+
+    return "Not found"
+
+
+# ChatQnA
+@chatqna_app.post(path="/v1/chatqna")
+async def chatqna(request: ChatCompletionRequest):
+    ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+    return str(ret)
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py
new file mode 100644
index 000000000..fb5b32792
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.api_schema import DataIn, FilesIn
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+data_app = FastAPI()
+
+
+# Upload a text or files
+@data_app.post(path="/v1/data")
+async def add_data(request: DataIn):
+    nodelist = None
+
+    docs = []
+    if request.text is not None:
+        docs.extend(ctx.get_file_mgr().add_text(text=request.text))
+    if request.local_path is not None:
+        docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path))
+
+    nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs)
+    if nodelist is None:
+        return "Error"
+    pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    # TODO: Need bug fix, when node_parser is None
+    ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+    return "Done"
+
+
+# Upload files by a list of file_path
+@data_app.post(path="/v1/data/files")
+async def add_files(request: FilesIn):
+    nodelist = None
+
+    docs = []
+    if request.local_paths is not None:
+        docs.extend(ctx.get_file_mgr().add_files(docs=request.local_paths))
+
+    nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs)
+    if nodelist is None:
+        return "Error"
+    pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    # TODO: Need bug fix, when node_parser is None
+    ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+    return "Done"
+
+
+# GET files
+@data_app.get(path="/v1/data/files")
+async def get_files():
+    return ctx.get_file_mgr().get_files()
+
+
+# GET a file
+@data_app.get(path="/v1/data/files")
+async def get_file_docs(name):
+    return ctx.get_file_mgr().get_docs_by_file(name)
+
+
+# DELETE a file
+@data_app.delete(path="/v1/data/files/{name}")
+async def delete_file(name):
+    if ctx.get_file_mgr().del_file(name):
+        # TODO: delete the nodes related to the file
+        all_docs = ctx.get_file_mgr().get_all_docs()
+
+        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        if nodelist is None:
+            return "Error"
+        pl = ctx.get_pipeline_mgr().get_active_pipeline()
+        ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx)
+        ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+        return f"File {name} is deleted"
+    else:
+        return f"File {name} not found"
+
+
+# UPDATE a file
+@data_app.patch(path="/v1/data/files/{name}")
+async def update_file(name, request: DataIn):
+    # 1. Delete
+    if ctx.get_file_mgr().del_file(name):
+        # 2. Add
+        docs = []
+        if request.text is not None:
+            docs.extend(ctx.get_file_mgr().add_text(text=request.text))
+        if request.local_path is not None:
+            docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path))
+
+        # 3. Re-run the pipeline
+        # TODO: update the nodes related to the file
+        all_docs = ctx.get_file_mgr().get_all_docs()
+        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        if nodelist is None:
+            return "Error"
+        pl = ctx.get_pipeline_mgr().get_active_pipeline()
+        ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx)
+        ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist)
+        return f"File {name} is updated"
+    else:
+        return f"File {name} not found"
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py
new file mode 100644
index 000000000..17044ae91
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import gc
+
+from edgecraftrag.api_schema import ModelIn
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+model_app = FastAPI()
+
+
+# GET Models
+@model_app.get(path="/v1/settings/models")
+async def get_models():
+    return ctx.get_model_mgr().get_models()
+
+
+# GET Model
+@model_app.get(path="/v1/settings/models/{model_id:path}")
+async def get_model_by_name(model_id):
+    return ctx.get_model_mgr().get_model_by_name(model_id)
+
+
+# POST Model
+@model_app.post(path="/v1/settings/models")
+async def add_model(request: ModelIn):
+    modelmgr = ctx.get_model_mgr()
+    # Currently use asyncio.Lock() to deal with multi-requests
+    async with modelmgr._lock:
+        model = modelmgr.search_model(request)
+        if model is None:
+            model = modelmgr.load_model(request)
+            modelmgr.add(model)
+    return model.model_id + " model loaded"
+
+
+# PATCH Model
+@model_app.patch(path="/v1/settings/models/{model_id:path}")
+async def update_model(model_id, request: ModelIn):
+    # The process of patch model is : 1.delete model 2.create model
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    modelmgr = ctx.get_model_mgr()
+    if active_pl and active_pl.model_existed(model_id):
+        return "Model is being used by active pipeline, unable to update model"
+    else:
+        async with modelmgr._lock:
+            if modelmgr.get_model_by_name(model_id) is None:
+                # Need to make sure original model still exists before updating model
+                # to prevent memory leak in concurrent requests situation
+                return "Model " + model_id + " not exists"
+            model = modelmgr.search_model(request)
+            if model is None:
+                modelmgr.del_model_by_name(model_id)
+                # Clean up memory occupation
+                gc.collect()
+                # load new model
+                model = modelmgr.load_model(request)
+                modelmgr.add(model)
+        return model
+
+
+# DELETE Model
+@model_app.delete(path="/v1/settings/models/{model_id:path}")
+async def delete_model(model_id):
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if active_pl and active_pl.model_existed(model_id):
+        return "Model is being used by active pipeline, unable to remove"
+    else:
+        modelmgr = ctx.get_model_mgr()
+        # Currently use asyncio.Lock() to deal with multi-requests
+        async with modelmgr._lock:
+            response = modelmgr.del_model_by_name(model_id)
+            # Clean up memory occupation
+            gc.collect()
+        return response
diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
new file mode 100644
index 000000000..9d008e82f
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
@@ -0,0 +1,180 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import weakref
+
+from edgecraftrag.api_schema import PipelineCreateIn
+from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType
+from edgecraftrag.components.generator import QnAGenerator
+from edgecraftrag.components.indexer import VectorIndexer
+from edgecraftrag.components.node_parser import HierarchyNodeParser, SimpleNodeParser, SWindowNodeParser
+from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor
+from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever
+from edgecraftrag.context import ctx
+from fastapi import FastAPI
+
+pipeline_app = FastAPI()
+
+
+# GET Pipelines
+@pipeline_app.get(path="/v1/settings/pipelines")
+async def get_pipelines():
+    return ctx.get_pipeline_mgr().get_pipelines()
+
+
+# GET Pipeline
+@pipeline_app.get(path="/v1/settings/pipelines/{name}")
+async def get_pipeline(name):
+    return ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
+
+
+# POST Pipeline
+@pipeline_app.post(path="/v1/settings/pipelines")
+async def add_pipeline(request: PipelineCreateIn):
+    pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(request.name)
+    if pl is None:
+        pl = ctx.get_pipeline_mgr().create_pipeline(request.name)
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if pl == active_pl:
+        if not request.active:
+            pass
+        else:
+            return "Unable to patch an active pipeline..."
+    update_pipeline_handler(pl, request)
+    return pl
+
+
+# PATCH Pipeline
+@pipeline_app.patch(path="/v1/settings/pipelines/{name}")
+async def update_pipeline(name, request: PipelineCreateIn):
+    pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
+    if pl is None:
+        return None
+    active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
+    if pl == active_pl:
+        if not request.active:
+            pass
+        else:
+            return "Unable to patch an active pipeline..."
+    async with ctx.get_pipeline_mgr()._lock:
+        update_pipeline_handler(pl, request)
+    return pl
+
+
+def update_pipeline_handler(pl, req):
+    if req.node_parser is not None:
+        np = req.node_parser
+        found_parser = ctx.get_node_parser_mgr().search_parser(np)
+        if found_parser is not None:
+            pl.node_parser = found_parser
+        else:
+            match np.parser_type:
+                case NodeParserType.SIMPLE:
+                    pl.node_parser = SimpleNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap)
+                case NodeParserType.HIERARCHY:
+                    """
+                    HierarchyNodeParser is for Auto Merging Retriever
+                    (https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/)
+                    By default, the hierarchy is:
+                    1st level: chunk size 2048
+                    2nd level: chunk size 512
+                    3rd level: chunk size 128
+                    Please set chunk size with List. e.g. chunk_size=[2048,512,128]
+                    """
+                    pl.node_parser = HierarchyNodeParser.from_defaults(
+                        chunk_sizes=np.chunk_sizes, chunk_overlap=np.chunk_overlap
+                    )
+                case NodeParserType.SENTENCEWINDOW:
+                    pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size)
+            ctx.get_node_parser_mgr().add(pl.node_parser)
+
+    if req.indexer is not None:
+        ind = req.indexer
+        found_indexer = ctx.get_indexer_mgr().search_indexer(ind)
+        if found_indexer is not None:
+            pl.indexer = found_indexer
+        else:
+            embed_model = None
+            if ind.embedding_model:
+                embed_model = ctx.get_model_mgr().search_model(ind.embedding_model)
+                if embed_model is None:
+                    ind.embedding_model.model_type = ModelType.EMBEDDING
+                    embed_model = ctx.get_model_mgr().load_model(ind.embedding_model)
+                    ctx.get_model_mgr().add(embed_model)
+            match ind.indexer_type:
+                case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR:
+                    # TODO: **RISK** if considering 2 pipelines with different
+                    # nodes, but same indexer, what will happen?
+                    pl.indexer = VectorIndexer(embed_model, ind.indexer_type)
+                case _:
+                    pass
+            ctx.get_indexer_mgr().add(pl.indexer)
+
+    if req.retriever is not None:
+        retr = req.retriever
+        match retr.retriever_type:
+            case RetrieverType.VECTORSIMILARITY:
+                if pl.indexer is not None:
+                    pl.retriever = VectorSimRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case RetrieverType.AUTOMERGE:
+                # AutoMergeRetriever looks at a set of leaf nodes and recursively "merges" subsets of leaf nodes that reference a parent node
+                if pl.indexer is not None:
+                    pl.retriever = AutoMergeRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case RetrieverType.BM25:
+                if pl.indexer is not None:
+                    pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk)
+                else:
+                    return "No indexer"
+            case _:
+                pass
+
+    if req.postprocessor is not None:
+        pp = req.postprocessor
+        pl.postprocessor = []
+        for processor in pp:
+            match processor.processor_type:
+                case PostProcessorType.RERANKER:
+                    if processor.reranker_model:
+                        prm = processor.reranker_model
+                        reranker_model = ctx.get_model_mgr().search_model(prm)
+                        if reranker_model is None:
+                            prm.model_type = ModelType.RERANKER
+                            reranker_model = ctx.get_model_mgr().load_model(prm)
+                            ctx.get_model_mgr().add(reranker_model)
+                        postprocessor = RerankProcessor(reranker_model, processor.top_n)
+                        pl.postprocessor.append(postprocessor)
+                    else:
+                        return "No reranker model"
+                case PostProcessorType.METADATAREPLACE:
+                    postprocessor = MetadataReplaceProcessor(target_metadata_key="window")
+                    pl.postprocessor.append(postprocessor)
+
+    if req.generator:
+        gen = req.generator
+        if gen.model is None:
+            return "No ChatQnA Model"
+        if gen.inference_type == InferenceType.VLLM:
+            if gen.model.model_id:
+                model_ref = gen.model.model_id
+            else:
+                model_ref = gen.model.model_path
+            pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
+        elif gen.inference_type == InferenceType.LOCAL:
+            model = ctx.get_model_mgr().search_model(gen.model)
+            if model is None:
+                gen.model.model_type = ModelType.LLM
+                model = ctx.get_model_mgr().load_model(gen.model)
+                ctx.get_model_mgr().add(model)
+            # Use weakref to achieve model deletion and memory release
+            model_ref = weakref.ref(model)
+            pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
+        else:
+            return "Inference Type Not Supported"
+
+    if pl.status.active != req.active:
+        ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr())
+    return pl
diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py
new file mode 100644
index 000000000..1f124a7f9
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/api_schema.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Optional
+
+from pydantic import BaseModel
+
+
+class ModelIn(BaseModel):
+    model_type: Optional[str] = "LLM"
+    model_id: Optional[str]
+    model_path: Optional[str] = "./"
+    device: Optional[str] = "cpu"
+
+
+class NodeParserIn(BaseModel):
+    chunk_size: Optional[int] = None
+    chunk_overlap: Optional[int] = None
+    chunk_sizes: Optional[list] = None
+    parser_type: str
+    window_size: Optional[int] = None
+
+
+class IndexerIn(BaseModel):
+    indexer_type: str
+    embedding_model: Optional[ModelIn] = None
+
+
+class RetrieverIn(BaseModel):
+    retriever_type: str
+    retrieve_topk: Optional[int] = 3
+
+
+class PostProcessorIn(BaseModel):
+    processor_type: str
+    reranker_model: Optional[ModelIn] = None
+    top_n: Optional[int] = 5
+
+
+class GeneratorIn(BaseModel):
+    prompt_path: Optional[str] = None
+    model: Optional[ModelIn] = None
+    inference_type: Optional[str] = "local"
+
+
+class PipelineCreateIn(BaseModel):
+    name: Optional[str] = None
+    node_parser: Optional[NodeParserIn] = None
+    indexer: Optional[IndexerIn] = None
+    retriever: Optional[RetrieverIn] = None
+    postprocessor: Optional[list[PostProcessorIn]] = None
+    generator: Optional[GeneratorIn] = None
+    active: Optional[bool] = False
+
+
+class DataIn(BaseModel):
+    text: Optional[str] = None
+    local_path: Optional[str] = None
+
+
+class FilesIn(BaseModel):
+    local_paths: Optional[list[str]] = None
diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py
new file mode 100644
index 000000000..d8c7aaef8
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/base.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import abc
+import uuid
+from enum import Enum
+from typing import Any, Callable, List, Optional
+
+from pydantic import BaseModel, ConfigDict, Field, model_serializer
+
+
+class CompType(str, Enum):
+
+    DEFAULT = "default"
+    MODEL = "model"
+    PIPELINE = "pipeline"
+    NODEPARSER = "node_parser"
+    INDEXER = "indexer"
+    RETRIEVER = "retriever"
+    POSTPROCESSOR = "postprocessor"
+    GENERATOR = "generator"
+    FILE = "file"
+
+
+class ModelType(str, Enum):
+
+    EMBEDDING = "embedding"
+    RERANKER = "reranker"
+    LLM = "llm"
+
+
+class FileType(str, Enum):
+    TEXT = "text"
+    VISUAL = "visual"
+    AURAL = "aural"
+    VIRTUAL = "virtual"
+    OTHER = "other"
+
+
+class NodeParserType(str, Enum):
+
+    DEFAULT = "default"
+    SIMPLE = "simple"
+    HIERARCHY = "hierarchical"
+    SENTENCEWINDOW = "sentencewindow"
+
+
+class IndexerType(str, Enum):
+
+    DEFAULT = "default"
+    FAISS_VECTOR = "faiss_vector"
+    DEFAULT_VECTOR = "vector"
+
+
+class RetrieverType(str, Enum):
+
+    DEFAULT = "default"
+    VECTORSIMILARITY = "vectorsimilarity"
+    AUTOMERGE = "auto_merge"
+    BM25 = "bm25"
+
+
+class PostProcessorType(str, Enum):
+
+    RERANKER = "reranker"
+    METADATAREPLACE = "metadata_replace"
+
+
+class GeneratorType(str, Enum):
+
+    CHATQNA = "chatqna"
+
+
+class InferenceType(str, Enum):
+
+    LOCAL = "local"
+    VLLM = "vllm"
+
+
+class CallbackType(str, Enum):
+
+    DATAPREP = "dataprep"
+    RETRIEVE = "retrieve"
+    PIPELINE = "pipeline"
+
+
+class BaseComponent(BaseModel):
+
+    model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
+
+    idx: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    name: Optional[str] = Field(default="")
+    comp_type: str = Field(default="")
+    comp_subtype: Optional[str] = Field(default="")
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "name": self.name,
+            "comp_type": self.comp_type,
+            "comp_subtype": self.comp_subtype,
+        }
+        return set
+
+    @abc.abstractmethod
+    def run(self, **kwargs) -> Any:
+        pass
+
+
+class BaseMgr:
+
+    def __init__(self):
+        self.components = {}
+
+    def add(self, comp: BaseComponent):
+        self.components[comp.idx] = comp
+
+    def get(self, idx: str) -> BaseComponent:
+        if idx in self.components:
+            return self.components[idx]
+        else:
+            return None
+
+    def remove(self, idx):
+        # remove the reference count
+        # after reference count == 0, object memory can be freed with Garbage Collector
+        del self.components[idx]
diff --git a/EdgeCraftRAG/edgecraftrag/components/__init__.py b/EdgeCraftRAG/edgecraftrag/components/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/components/data.py b/EdgeCraftRAG/edgecraftrag/components/data.py
new file mode 100644
index 000000000..e7fa19e7a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/data.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import Any, List, Optional
+
+from edgecraftrag.base import BaseComponent, CompType, FileType
+from llama_index.core.schema import Document
+from pydantic import BaseModel, Field, model_serializer
+
+
+class File(BaseComponent):
+    file_path: str = Field(default="")
+    comp_subtype: str = Field(default="")
+    documents: List[Document] = Field(default=[])
+
+    def __init__(self, file_name: Optional[str] = None, file_path: Optional[str] = None, content: Optional[str] = None):
+        super().__init__(comp_type=CompType.FILE)
+
+        if not file_name and not file_path:
+            raise ValueError("File name or path must be provided")
+
+        _path = Path(file_path) if file_path else None
+        if file_name:
+            self.name = file_name
+        else:
+            self.name = _path.name
+        self.file_path = _path
+        self.comp_subtype = FileType.TEXT
+        if _path and _path.exists():
+            self.documents.extend(convert_file_to_documents(_path))
+        if content:
+            self.documents.extend(convert_text_to_documents(content))
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "file_name": self.name,
+            "file_id": self.idx,
+            "file_type": self.comp_subtype,
+            "file_path": str(self.file_path),
+            "docs_count": len(self.documents),
+        }
+        return set
+
+
+def convert_text_to_documents(text) -> List[Document]:
+    return [Document(text=text, metadata={"file_name": "text"})]
+
+
+def convert_file_to_documents(file_path) -> List[Document]:
+    from llama_index.core import SimpleDirectoryReader
+
+    supported_exts = [".pdf", ".txt", ".doc", ".docx", ".pptx", ".ppt", ".csv", ".md", ".html", ".rst"]
+    if file_path.is_dir():
+        docs = SimpleDirectoryReader(input_dir=file_path, recursive=True, required_exts=supported_exts).load_data()
+    elif file_path.is_file():
+        docs = SimpleDirectoryReader(input_files=[file_path], required_exts=supported_exts).load_data()
+    else:
+        docs = []
+
+    return docs
diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py
new file mode 100644
index 000000000..cbfd6686d
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/generator.py
@@ -0,0 +1,194 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import dataclasses
+import os
+
+from comps import GeneratedDoc, opea_telemetry
+from edgecraftrag.base import BaseComponent, CompType, GeneratorType
+from fastapi.responses import StreamingResponse
+from langchain_core.prompts import PromptTemplate
+from llama_index.llms.openai_like import OpenAILike
+from pydantic import model_serializer
+
+
+@opea_telemetry
+def post_process_text(text: str):
+    if text == " ":
+        return "data: @#$\n\n"
+    if text == "\n":
+        return "data: <br/>\n\n"
+    if text.isspace():
+        return None
+    new_text = text.replace(" ", "@#$")
+    return f"data: {new_text}\n\n"
+
+
+class QnAGenerator(BaseComponent):
+
+    def __init__(self, llm_model, prompt_template, inference_type, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.GENERATOR,
+            comp_subtype=GeneratorType.CHATQNA,
+        )
+        self.inference_type = inference_type
+        self._REPLACE_PAIRS = (
+            ("\n\n", "\n"),
+            ("\t\n", "\n"),
+        )
+        template = prompt_template
+        self.prompt = (
+            DocumentedContextRagPromptTemplate.from_file(template)
+            if os.path.isfile(template)
+            else DocumentedContextRagPromptTemplate.from_template(template)
+        )
+        self.llm = llm_model
+        if isinstance(llm_model, str):
+            self.model_id = llm_model
+        else:
+            self.model_id = llm_model().model_id
+
+    def clean_string(self, string):
+        ret = string
+        for p in self._REPLACE_PAIRS:
+            ret = ret.replace(*p)
+        return ret
+
+    def run(self, chat_request, retrieved_nodes, **kwargs):
+        if self.llm() is None:
+            # This could happen when User delete all LLMs through RESTful API
+            return "No LLM available, please load LLM"
+        # query transformation
+        text_gen_context = ""
+        for n in retrieved_nodes:
+            origin_text = n.node.get_text()
+            text_gen_context += self.clean_string(origin_text.strip())
+
+        query = chat_request.messages
+        prompt_str = self.prompt.format(input=query, context=text_gen_context)
+        generate_kwargs = dict(
+            temperature=chat_request.temperature,
+            do_sample=chat_request.temperature > 0.0,
+            top_p=chat_request.top_p,
+            top_k=chat_request.top_k,
+            typical_p=chat_request.typical_p,
+            repetition_penalty=chat_request.repetition_penalty,
+        )
+        self.llm().generate_kwargs = generate_kwargs
+
+        return self.llm().complete(prompt_str)
+
+    def run_vllm(self, chat_request, retrieved_nodes, **kwargs):
+        if self.llm is None:
+            return "No LLM provided, please provide model_id_or_path"
+        # query transformation
+        text_gen_context = ""
+        for n in retrieved_nodes:
+            origin_text = n.node.get_text()
+            text_gen_context += self.clean_string(origin_text.strip())
+
+        query = chat_request.messages
+        prompt_str = self.prompt.format(input=query, context=text_gen_context)
+
+        llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008")
+        model_name = self.llm
+        llm = OpenAILike(
+            api_key="fake",
+            api_base=llm_endpoint + "/v1",
+            max_tokens=chat_request.max_tokens,
+            model=model_name,
+            top_p=chat_request.top_p,
+            temperature=chat_request.temperature,
+            streaming=chat_request.stream,
+        )
+
+        if chat_request.stream:
+
+            async def stream_generator():
+                response = await llm.astream_complete(prompt_str)
+                async for text in response:
+                    output = text.text
+                    yield f"data: {output}\n\n"
+
+                yield "data: [DONE]\n\n"
+
+            return StreamingResponse(stream_generator(), media_type="text/event-stream")
+        else:
+            response = llm.complete(prompt_str)
+            response = response.text
+
+            return GeneratedDoc(text=response, prompt=prompt_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "generator_type": self.comp_subtype, "model": self.model_id}
+        return set
+
+
+@dataclasses.dataclass
+class INSTRUCTIONS:
+    IM_START = "You are an AI assistant that helps users answer questions given a specific context."
+    SUCCINCT = "Ensure your response is succinct"
+    ACCURATE = "Ensure your response is accurate."
+    SUCCINCT_AND_ACCURATE = "Ensure your response is succinct. Try to be accurate if possible."
+    ACCURATE_AND_SUCCINCT = "Ensure your response is accurate. Try to be succinct if possible."
+    NO_RAMBLING = "Avoid posing new questions or self-questioning and answering, and refrain from repeating words in your response."
+    SAY_SOMETHING = "Avoid meaningless answer such a random symbol or blanks."
+    ENCOURAGE = "If you cannot well understand the question, try to translate it into English, and translate the answer back to the language of the question."
+    NO_IDEA = (
+        'If the answer is not discernible, please respond with "Sorry. I have no idea" in the language of the question.'
+    )
+    CLOZE_TEST = """The task is a fill-in-the-blank/cloze test."""
+    NO_MEANINGLESS_SYMBOLS = "Meaningless symbols and ``` should not be included in your response."
+    ADAPT_NATIVE_LANGUAGE = "Please try to think like a person that speak the same language that the question used."
+
+
+def _is_cloze(question):
+    return ("()" in question or "（）" in question) and ("填" in question or "fill" in question or "cloze" in question)
+
+
+# depreciated
+def get_instructions(question):
+    # naive pre-retrieval rewrite
+    # cloze
+    if _is_cloze(question):
+        instructions = [
+            INSTRUCTIONS.CLOZE_TEST,
+        ]
+    else:
+        instructions = [
+            INSTRUCTIONS.ACCURATE_AND_SUCCINCT,
+            INSTRUCTIONS.NO_RAMBLING,
+            INSTRUCTIONS.NO_MEANINGLESS_SYMBOLS,
+        ]
+    return ["System: {}".format(_) for _ in instructions]
+
+
+def preprocess_question(question):
+    if _is_cloze(question):
+        question = question.replace(" ", "").replace("（", "(").replace("）", ")")
+        # .replace("()", " <|blank|> ")
+        ret = "User: Please finish the following fill-in-the-blank question marked by $$$ at the beginning and end. Make sure all the () are filled.\n$$$\n{}\n$$$\nAssistant: ".format(
+            question
+        )
+    else:
+        ret = "User: {}\nAssistant: 从上下文提供的信息中可以知道，".format(question)
+    return ret
+
+
+class DocumentedContextRagPromptTemplate(PromptTemplate):
+
+    def format(self, **kwargs) -> str:
+        # context = '\n'.join([clean_string(f"{_.page_content}".strip()) for i, _ in enumerate(kwargs["context"])])
+        context = kwargs["context"]
+        question = kwargs["input"]
+        preprocessed_question = preprocess_question(question)
+        if "instructions" in self.template:
+            instructions = get_instructions(question)
+            prompt_str = self.template.format(
+                context=context, instructions="\n".join(instructions), input=preprocessed_question
+            )
+        else:
+            prompt_str = self.template.format(context=context, input=preprocessed_question)
+        return prompt_str
diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py
new file mode 100644
index 000000000..83346d490
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+import faiss
+from edgecraftrag.base import BaseComponent, CompType, IndexerType
+from llama_index.core import StorageContext, VectorStoreIndex
+from llama_index.vector_stores.faiss import FaissVectorStore
+from pydantic import model_serializer
+
+
+class VectorIndexer(BaseComponent, VectorStoreIndex):
+
+    def __init__(self, embed_model, vector_type):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.INDEXER,
+            comp_subtype=vector_type,
+        )
+        self.model = embed_model
+        if not embed_model:
+            # Settings.embed_model should be set to None when embed_model is None to avoid 'no oneapi key' error
+            from llama_index.core import Settings
+
+            Settings.embed_model = None
+        match vector_type:
+            case IndexerType.DEFAULT_VECTOR:
+                VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[])
+            case IndexerType.FAISS_VECTOR:
+                if embed_model:
+                    d = embed_model._model.request.outputs[0].get_partial_shape()[2].get_length()
+                else:
+                    d = 128
+                faiss_index = faiss.IndexFlatL2(d)
+                faiss_store = StorageContext.from_defaults(vector_store=FaissVectorStore(faiss_index=faiss_index))
+                VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=faiss_store)
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "indexer_type": self.comp_subtype, "model": self.model}
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py
new file mode 100644
index 000000000..72ee7f16e
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/model.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, Optional
+
+from edgecraftrag.base import BaseComponent, CompType, ModelType
+from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding
+from llama_index.llms.openvino import OpenVINOLLM
+from llama_index.postprocessor.openvino_rerank import OpenVINORerank
+from pydantic import Field, model_serializer
+
+
+class BaseModelComponent(BaseComponent):
+
+    model_id: Optional[str] = Field(default="")
+    model_path: Optional[str] = Field(default="")
+    device: Optional[str] = Field(default="cpu")
+
+    def run(self, **kwargs) -> Any:
+        pass
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "type": self.comp_subtype,
+            "model_id": self.model_id,
+            "model_path": self.model_path,
+            "device": self.device,
+        }
+        return set
+
+
+class OpenVINOEmbeddingModel(BaseModelComponent, OpenVINOEmbedding):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINOEmbedding.create_and_save_openvino_model(model_id, model_path)
+        OpenVINOEmbedding.__init__(self, model_id_or_path=model_path, device=device)
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.EMBEDDING
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
+
+
+class OpenVINORerankModel(BaseModelComponent, OpenVINORerank):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINORerank.create_and_save_openvino_model(model_id, model_path)
+        OpenVINORerank.__init__(
+            self,
+            model_id_or_path=model_path,
+            device=device,
+        )
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.RERANKER
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
+
+
+class OpenVINOLLMModel(BaseModelComponent, OpenVINOLLM):
+
+    def __init__(self, model_id, model_path, device):
+        OpenVINOLLM.__init__(
+            self,
+            model_id_or_path=model_path,
+            device_map=device,
+        )
+        self.comp_type = CompType.MODEL
+        self.comp_subtype = ModelType.LLM
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
diff --git a/EdgeCraftRAG/edgecraftrag/components/node_parser.py b/EdgeCraftRAG/edgecraftrag/components/node_parser.py
new file mode 100644
index 000000000..cd50f4534
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/node_parser.py
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from edgecraftrag.base import BaseComponent, CompType, NodeParserType
+from llama_index.core.node_parser import HierarchicalNodeParser, SentenceSplitter, SentenceWindowNodeParser
+from pydantic import model_serializer
+
+
+class SimpleNodeParser(BaseComponent, SentenceSplitter):
+
+    # Use super for SentenceSplitter since it's __init__ will cleanup
+    # BaseComponent fields
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.SIMPLE
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": self.chunk_size,
+            "chunk_overlap": self.chunk_overlap,
+        }
+        return set
+
+
+class HierarchyNodeParser(BaseComponent, HierarchicalNodeParser):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.HIERARCHY
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": self.chunk_sizes,
+            "chunk_overlap": None,
+        }
+        return set
+
+
+class SWindowNodeParser(BaseComponent, SentenceWindowNodeParser):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.comp_type = CompType.NODEPARSER
+        self.comp_subtype = NodeParserType.SENTENCEWINDOW
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "docs":
+                return self.get_nodes_from_documents(v, show_progress=False)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "parser_type": self.comp_subtype,
+            "chunk_size": None,
+            "chunk_overlap": None,
+        }
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
new file mode 100644
index 000000000..4a2932e00
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, Callable, List, Optional
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType
+from edgecraftrag.components.postprocessor import RerankProcessor
+from llama_index.core.schema import Document, QueryBundle
+from pydantic import BaseModel, Field, model_serializer
+
+
+class PipelineStatus(BaseModel):
+    active: bool = False
+
+
+class Pipeline(BaseComponent):
+
+    node_parser: Optional[BaseComponent] = Field(default=None)
+    indexer: Optional[BaseComponent] = Field(default=None)
+    retriever: Optional[BaseComponent] = Field(default=None)
+    postprocessor: Optional[List[BaseComponent]] = Field(default=None)
+    generator: Optional[BaseComponent] = Field(default=None)
+    status: PipelineStatus = Field(default=PipelineStatus())
+    run_pipeline_cb: Optional[Callable[..., Any]] = Field(default=None)
+    run_retriever_cb: Optional[Callable[..., Any]] = Field(default=None)
+    run_data_prepare_cb: Optional[Callable[..., Any]] = Field(default=None)
+
+    def __init__(
+        self,
+        name,
+    ):
+        super().__init__(name=name, comp_type=CompType.PIPELINE)
+        if self.name == "" or self.name is None:
+            self.name = self.idx
+        self.run_pipeline_cb = run_test_generator
+        self.run_retriever_cb = run_test_retrieve
+        self.run_data_prepare_cb = run_simple_doc
+        self._node_changed = True
+
+    # TODO: consider race condition
+    @property
+    def node_changed(self) -> bool:
+        return self._node_changed
+
+    # TODO: update doc changes
+    # TODO: more operations needed, add, del, modify
+    def update_nodes(self, nodes):
+        print("updating nodes ", nodes)
+        if self.indexer is not None:
+            self.indexer.insert_nodes(nodes)
+
+    # TODO: check more conditions
+    def check_active(self, nodelist):
+        if self._node_changed and nodelist is not None:
+            self.update_nodes(nodelist)
+
+    # Implement abstract run function
+    # callback dispatcher
+    def run(self, **kwargs) -> Any:
+        print(kwargs)
+        if "cbtype" in kwargs:
+            if kwargs["cbtype"] == CallbackType.DATAPREP:
+                if "docs" in kwargs:
+                    return self.run_data_prepare_cb(self, docs=kwargs["docs"])
+            if kwargs["cbtype"] == CallbackType.RETRIEVE:
+                if "chat_request" in kwargs:
+                    return self.run_retriever_cb(self, chat_request=kwargs["chat_request"])
+            if kwargs["cbtype"] == CallbackType.PIPELINE:
+                if "chat_request" in kwargs:
+                    return self.run_pipeline_cb(self, chat_request=kwargs["chat_request"])
+
+    def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=None, generator=None):
+        if node_parser is not None:
+            self.node_parser = node_parser
+        if indexer is not None:
+            self.indexer = indexer
+        if retriever is not None:
+            self.retriever = retriever
+        if postprocessor is not None:
+            self.postprocessor = postprocessor
+        if generator is not None:
+            self.generator = generator
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "name": self.name,
+            "comp_type": self.comp_type,
+            "node_parser": self.node_parser,
+            "indexer": self.indexer,
+            "retriever": self.retriever,
+            "postprocessor": self.postprocessor,
+            "generator": self.generator,
+            "status": self.status,
+        }
+        return set
+
+    def model_existed(self, model_id: str) -> bool:
+        # judge if the given model is existed in a pipeline by model_id
+        if self.indexer:
+            if hasattr(self.indexer, "_embed_model") and self.indexer._embed_model.model_id == model_id:
+                return True
+            if hasattr(self.indexer, "_llm") and self.indexer._llm.model_id == model_id:
+                return True
+        if self.postprocessor:
+            for processor in self.postprocessor:
+                if hasattr(processor, "model_id") and processor.model_id == model_id:
+                    return True
+        if self.generator:
+            llm = self.generator.llm
+            if llm() and llm().model_id == model_id:
+                return True
+        return False
+
+
+# Test callback to retrieve nodes from query
+def run_test_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any:
+    query = chat_request.messages
+    retri_res = pl.retriever.run(query=query)
+    query_bundle = QueryBundle(query)
+    if pl.postprocessor:
+        for processor in pl.postprocessor:
+            if (
+                isinstance(processor, RerankProcessor)
+                and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default
+            ):
+                processor.top_n = chat_request.top_n
+            retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle)
+    return retri_res
+
+
+def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any:
+    n = pl.node_parser.run(docs=docs)
+    if pl.indexer is not None:
+        pl.indexer.insert_nodes(n)
+    print(pl.indexer._index_struct)
+    return n
+
+
+def run_test_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any:
+    query = chat_request.messages
+    retri_res = pl.retriever.run(query=query)
+    query_bundle = QueryBundle(query)
+    if pl.postprocessor:
+        for processor in pl.postprocessor:
+            if (
+                isinstance(processor, RerankProcessor)
+                and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default
+            ):
+                processor.top_n = chat_request.top_n
+            retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle)
+    if pl.generator is None:
+        return "No Generator Specified"
+    if pl.generator.inference_type == InferenceType.LOCAL:
+        answer = pl.generator.run(chat_request, retri_res)
+    elif pl.generator.inference_type == InferenceType.VLLM:
+        answer = pl.generator.run_vllm(chat_request, retri_res)
+    return answer
diff --git a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py
new file mode 100644
index 000000000..672826bdb
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from edgecraftrag.base import BaseComponent, CompType, PostProcessorType
+from llama_index.core.postprocessor import MetadataReplacementPostProcessor
+from pydantic import model_serializer
+
+
+class RerankProcessor(BaseComponent):
+
+    def __init__(self, rerank_model, top_n):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.POSTPROCESSOR,
+            comp_subtype=PostProcessorType.RERANKER,
+        )
+        self.model = rerank_model
+        self.top_n = top_n
+
+    def run(self, **kwargs) -> Any:
+        self.model.top_n = self.top_n
+        query_bundle = None
+        query_str = None
+        if "retri_res" in kwargs:
+            nodes = kwargs["retri_res"]
+        if "query_bundle" in kwargs:
+            query_bundle = kwargs["query_bundle"]
+        if "query_str" in kwargs:
+            query_str = kwargs["query_str"]
+        return self.model.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": self.model, "top_n": self.top_n}
+        return set
+
+
+class MetadataReplaceProcessor(BaseComponent, MetadataReplacementPostProcessor):
+
+    def __init__(self, target_metadata_key="window"):
+        BaseComponent.__init__(
+            self,
+            target_metadata_key=target_metadata_key,
+            comp_type=CompType.POSTPROCESSOR,
+            comp_subtype=PostProcessorType.METADATAREPLACE,
+        )
+
+    def run(self, **kwargs) -> Any:
+        query_bundle = None
+        query_str = None
+        if "retri_res" in kwargs:
+            nodes = kwargs["retri_res"]
+        if "query_bundle" in kwargs:
+            query_bundle = kwargs["query_bundle"]
+        if "query_str" in kwargs:
+            query_str = kwargs["query_str"]
+        return self.postprocess_nodes(nodes, query_bundle=query_bundle, query_str=query_str)
+
+    @model_serializer
+    def ser_model(self):
+        set = {"idx": self.idx, "postprocessor_type": self.comp_subtype, "model": None, "top_n": None}
+        return set
diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py
new file mode 100644
index 000000000..cba251b2a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, List, cast
+
+from edgecraftrag.base import BaseComponent, CompType, RetrieverType
+from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
+from llama_index.core.retrievers import AutoMergingRetriever
+from llama_index.core.schema import BaseNode
+from llama_index.retrievers.bm25 import BM25Retriever
+from pydantic import model_serializer
+
+
+class VectorSimRetriever(BaseComponent, VectorIndexRetriever):
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.VECTORSIMILARITY,
+        )
+        VectorIndexRetriever.__init__(
+            self,
+            index=indexer,
+            node_ids=list(indexer.index_struct.nodes_dict.values()),
+            callback_manager=indexer._callback_manager,
+            object_map=indexer._object_map,
+            **kwargs,
+        )
+        # This might be a bug of llamaindex retriever.
+        # The node_ids will never be updated after the retriever's
+        # creation. However, the node_ids decides the available node
+        # ids to be retrieved which means the target nodes to be
+        # retrieved are freezed to the time of the retriever's creation.
+        self._node_ids = None
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                return self.retrieve(v)
+
+        return None
+
+    @model_serializer
+    def ser_model(self):
+        set = {
+            "idx": self.idx,
+            "retriever_type": self.comp_subtype,
+            "retrieve_topk": self.similarity_top_k,
+        }
+        return set
+
+
+class AutoMergeRetriever(BaseComponent, AutoMergingRetriever):
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.AUTOMERGE,
+        )
+        self._index = indexer
+        self.topk = kwargs["similarity_top_k"]
+
+        AutoMergingRetriever.__init__(
+            self,
+            vector_retriever=indexer.as_retriever(**kwargs),
+            storage_context=indexer._storage_context,
+            object_map=indexer._object_map,
+            callback_manager=indexer._callback_manager,
+        )
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                # vector_retriever needs to be updated
+                self._vector_retriever = self._index.as_retriever(similarity_top_k=self.topk)
+                return self.retrieve(v)
+
+        return None
+
+
+class SimpleBM25Retriever(BaseComponent):
+    # The nodes parameter in BM25Retriever is not from index,
+    # nodes in BM25Retriever can not be updated through 'indexer.insert_nodes()',
+    # which means nodes should be passed to BM25Retriever after data preparation stage, not init stage
+
+    def __init__(self, indexer, **kwargs):
+        BaseComponent.__init__(
+            self,
+            comp_type=CompType.RETRIEVER,
+            comp_subtype=RetrieverType.BM25,
+        )
+        self._docstore = indexer._docstore
+        self.topk = kwargs["similarity_top_k"]
+
+    def run(self, **kwargs) -> Any:
+        for k, v in kwargs.items():
+            if k == "query":
+                nodes = cast(List[BaseNode], list(self._docstore.docs.values()))
+                bm25_retr = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=self.topk)
+                return bm25_retr.retrieve(v)
+
+        return None
diff --git a/EdgeCraftRAG/edgecraftrag/context.py b/EdgeCraftRAG/edgecraftrag/context.py
new file mode 100644
index 000000000..3555ce4be
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/context.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.controllers.compmgr import GeneratorMgr, IndexerMgr, NodeParserMgr, PostProcessorMgr, RetrieverMgr
+from edgecraftrag.controllers.filemgr import FilelMgr
+from edgecraftrag.controllers.modelmgr import ModelMgr
+from edgecraftrag.controllers.nodemgr import NodeMgr
+from edgecraftrag.controllers.pipelinemgr import PipelineMgr
+
+
+class Context:
+
+    def __init__(self):
+        self.plmgr = PipelineMgr()
+        self.nodemgr = NodeMgr()
+        self.npmgr = NodeParserMgr()
+        self.idxmgr = IndexerMgr()
+        self.rtvmgr = RetrieverMgr()
+        self.ppmgr = PostProcessorMgr()
+        self.modmgr = ModelMgr()
+        self.genmgr = GeneratorMgr()
+        self.filemgr = FilelMgr()
+
+    def get_pipeline_mgr(self):
+        return self.plmgr
+
+    def get_node_mgr(self):
+        return self.nodemgr
+
+    def get_node_parser_mgr(self):
+        return self.npmgr
+
+    def get_indexer_mgr(self):
+        return self.idxmgr
+
+    def get_retriever_mgr(self):
+        return self.rtvmgr
+
+    def get_postprocessor_mgr(self):
+        return self.ppmgr
+
+    def get_model_mgr(self):
+        return self.modmgr
+
+    def get_generator_mgr(self):
+        return self.genmgr
+
+    def get_file_mgr(self):
+        return self.filemgr
+
+
+ctx = Context()
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/__init__.py b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py
new file mode 100644
index 000000000..b8dd82ab7
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py
@@ -0,0 +1,66 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType, NodeParserType
+
+
+class NodeParserMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def search_parser(self, npin: NodeParserIn) -> BaseComponent:
+        for _, v in self.components.items():
+            v_parser_type = v.comp_subtype
+            if v_parser_type == npin.parser_type:
+                if v_parser_type == NodeParserType.HIERARCHY and v.chunk_sizes == npin.chunk_sizes:
+                    return v
+                elif v_parser_type == NodeParserType.SENTENCEWINDOW and v.window_size == npin.window_size:
+                    return v
+                elif (
+                    v_parser_type == NodeParserType.SIMPLE
+                    and v.chunk_size == npin.chunk_size
+                    and v.chunk_overlap == npin.chunk_overlap
+                ):
+                    return v
+        return None
+
+
+class IndexerMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def search_indexer(self, indin: IndexerIn) -> BaseComponent:
+        for _, v in self.components.items():
+            if v.comp_subtype == indin.indexer_type:
+                if (
+                    hasattr(v, "model")
+                    and v.model
+                    and indin.embedding_model
+                    and (
+                        (v.model.model_id_or_path == indin.embedding_model.model_id)
+                        or (v.model.model_id_or_path == indin.embedding_model.model_path)
+                    )
+                ):
+                    return v
+        return None
+
+
+class RetrieverMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+
+class PostProcessorMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+
+class GeneratorMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py
new file mode 100644
index 000000000..0278f1f6a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py
@@ -0,0 +1,83 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import os
+from typing import Any, Callable, List, Optional
+
+from edgecraftrag.base import BaseMgr
+from edgecraftrag.components.data import File
+from llama_index.core.schema import Document
+
+
+class FilelMgr(BaseMgr):
+
+    def __init__(self):
+        super().__init__()
+
+    def add_text(self, text: str):
+        file = File(file_name="text", content=text)
+        self.add(file)
+        return file.documents
+
+    def add_files(self, docs: Any):
+        if not isinstance(docs, list):
+            docs = [docs]
+
+        input_docs = []
+        for doc in docs:
+            if not os.path.exists(doc):
+                continue
+
+            if os.path.isfile(doc):
+                files = [doc]
+            elif os.path.isdir(doc):
+                files = [os.path.join(root, f) for root, _, files in os.walk(doc) for f in files]
+            else:
+                continue
+
+            if not files:
+                continue
+
+            for file_path in files:
+                file = File(file_path=file_path)
+                self.add(file)
+                input_docs.extend(file.documents)
+
+        return input_docs
+
+    def get_file_by_name_or_id(self, name: str):
+        for _, file in self.components.items():
+            if file.name == name or file.idx == name:
+                return file
+        return None
+
+    def get_files(self):
+        return [file for _, file in self.components.items()]
+
+    def get_all_docs(self) -> List[Document]:
+        all_docs = []
+        for _, file in self.components.items():
+            all_docs.extend(file.documents)
+        return all_docs
+
+    def get_docs_by_file(self, name) -> List[Document]:
+        file = self.get_file_by_name_or_id(name)
+        return file.documents if file else []
+
+    def del_file(self, name):
+        file = self.get_file_by_name_or_id(name)
+        if file:
+            self.remove(file.idx)
+            return True
+        else:
+            return False
+
+    def update_file(self, name):
+        file = self.get_file_by_name_or_id(name)
+        if file:
+            self.remove(file.idx)
+            self.add_files(docs=name)
+            return True
+        else:
+            return False
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
new file mode 100644
index 000000000..73a77e48a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py
@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType
+from edgecraftrag.components.model import OpenVINOEmbeddingModel, OpenVINOLLMModel, OpenVINORerankModel
+
+
+class ModelMgr(BaseMgr):
+
+    def __init__(self):
+        self._lock = asyncio.Lock()
+        super().__init__()
+
+    def get_model_by_name(self, name: str):
+        for _, v in self.components.items():
+            if v.model_id == name:
+                model_type = v.comp_subtype.value
+                model_info = {
+                    "model_type": model_type,
+                    "model_id": getattr(v, "model_id", "Unknown"),
+                }
+                if model_type == ModelType.LLM:
+                    model_info["model_path"] = getattr(v, "model_name", "Unknown")
+                    model_info["device"] = getattr(v, "device_map", "Unknown")
+                else:
+                    model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown")
+                    model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown"))
+                return model_info
+        return None
+
+    def get_models(self):
+        model = {}
+        for k, v in self.components.items():
+            # Supplement the information of the model
+            model_type = v.comp_subtype.value
+            model_info = {
+                "model_type": model_type,
+                "model_id": getattr(v, "model_id", "Unknown"),
+            }
+            if model_type == ModelType.LLM:
+                model_info["model_path"] = getattr(v, "model_name", "Unknown")
+                model_info["device"] = getattr(v, "device_map", "Unknown")
+            else:
+                model_info["model_path"] = getattr(v, "model_id_or_path", "Unknown")
+                model_info["device"] = getattr(v, "device", getattr(v, "_device", "Unknown"))
+            model[k] = model_info
+        return model
+
+    def search_model(self, modelin: ModelIn) -> BaseComponent:
+        # Compare model_path and device to search model
+        for _, v in self.components.items():
+            model_path = v.model_name if v.comp_subtype.value == "llm" else v.model_id_or_path
+            model_dev = (
+                v.device_map
+                if v.comp_subtype.value == "llm"
+                else getattr(v, "device", getattr(v, "_device", "Unknown"))
+            )
+            if model_path == modelin.model_path and model_dev == modelin.device:
+                return v
+        return None
+
+    def del_model_by_name(self, name: str):
+        for key, v in self.components.items():
+            if v and v.model_id == name:
+                self.remove(key)
+                return "Model deleted"
+        return "Model not found"
+
+    @staticmethod
+    def load_model(model_para: ModelIn):
+        model = None
+        match model_para.model_type:
+            case ModelType.EMBEDDING:
+                model = OpenVINOEmbeddingModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+            case ModelType.RERANKER:
+                model = OpenVINORerankModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+            case ModelType.LLM:
+                model = OpenVINOLLMModel(
+                    model_id=model_para.model_id,
+                    model_path=model_para.model_path,
+                    device=model_para.device,
+                )
+        return model
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py
new file mode 100644
index 000000000..13a41117c
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/nodemgr.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import List
+
+from edgecraftrag.api_schema import IndexerIn, ModelIn, NodeParserIn
+from edgecraftrag.base import BaseComponent, BaseMgr, CallbackType, ModelType
+from llama_index.core.schema import BaseNode
+
+
+class NodeMgr:
+
+    def __init__(self):
+        self.nodes = {}
+
+    # idx: index of node_parser
+    def add_nodes(self, np_idx, nodes):
+        if np_idx in self.nodes:
+            self.nodes[np_idx].append(nodes)
+        else:
+            self.nodes[np_idx] = nodes
+
+    # TODO: to be implemented
+    def del_nodes(self, nodes):
+        pass
+
+    def del_nodes_by_np_idx(self, np_idx):
+        del self.nodes[np_idx]
+
+    def get_nodes(self, np_idx) -> List[BaseNode]:
+        if np_idx in self.nodes:
+            return self.nodes[np_idx]
+        else:
+            return []
diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py
new file mode 100644
index 000000000..d0b8e0780
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+from typing import Any, List
+
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.base import BaseMgr, CallbackType
+from edgecraftrag.components.pipeline import Pipeline
+from edgecraftrag.controllers.nodemgr import NodeMgr
+from llama_index.core.schema import Document
+
+
+class PipelineMgr(BaseMgr):
+
+    def __init__(self):
+        self._active_pipeline = None
+        self._lock = asyncio.Lock()
+        super().__init__()
+
+    def create_pipeline(self, name: str):
+        pl = Pipeline(name)
+        self.add(pl)
+        return pl
+
+    def get_pipeline_by_name_or_id(self, name: str):
+        for _, pl in self.components.items():
+            if pl.name == name or pl.idx == name:
+                return pl
+        return None
+
+    def get_pipelines(self):
+        return [pl for _, pl in self.components.items()]
+
+    def activate_pipeline(self, name: str, active: bool, nm: NodeMgr):
+        pl = self.get_pipeline_by_name_or_id(name)
+        nodelist = None
+        if pl is not None:
+            if not active:
+                pl.status.active = False
+                self._active_pipeline = None
+                return
+            if pl.node_changed:
+                nodelist = nm.get_nodes(pl.node_parser.idx)
+        pl.check_active(nodelist)
+        prevactive = self._active_pipeline
+        if prevactive:
+            prevactive.status.active = False
+        pl.status.active = True
+        self._active_pipeline = pl
+
+    def get_active_pipeline(self) -> Pipeline:
+        return self._active_pipeline
+
+    def notify_node_change(self):
+        for _, pl in self.components.items():
+            pl.set_node_change()
+
+    def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any:
+        ap = self.get_active_pipeline()
+        out = None
+        if ap is not None:
+            out = ap.run(cbtype=CallbackType.PIPELINE, chat_request=chat_request)
+            return out
+        return -1
+
+    def run_retrieve(self, chat_request: ChatCompletionRequest) -> Any:
+        ap = self.get_active_pipeline()
+        out = None
+        if ap is not None:
+            out = ap.run(cbtype=CallbackType.RETRIEVE, chat_request=chat_request)
+            return out
+        return -1
+
+    def run_data_prepare(self, docs: List[Document]) -> Any:
+        ap = self.get_active_pipeline()
+        if ap is not None:
+            return ap.run(cbtype=CallbackType.DATAPREP, docs=docs)
+        return -1
diff --git a/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
new file mode 100644
index 000000000..aa57e6059
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/prompt_template/default_prompt.txt
@@ -0,0 +1,8 @@
+<|im_start|>System: You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge.<|im_end|>
+
+<|im_start|>{context}<|im_end|>
+
+<|im_start|>System: Pay attention to your formatting of response. If you need to reference content from context, try to keep the formatting.<|im_end|>
+<|im_start|>System: Try to summarize from the context, do some reasoning before response, then response. Make sure your response is logically sound and self-consistent.<|im_end|>
+
+<|im_start|>{input}
\ No newline at end of file
diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt
new file mode 100644
index 000000000..3756c732a
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/requirements.txt
@@ -0,0 +1,16 @@
+docx2txt
+faiss-cpu>=1.8.0.post1
+gradio>=4.44.1
+langchain-core==0.2.29
+llama-index>=0.11.0
+llama-index-embeddings-openvino>=0.4.0
+llama-index-llms-openai-like>=0.2.0
+llama-index-llms-openvino>=0.3.1
+llama-index-postprocessor-openvino-rerank>=0.3.0
+llama-index-retrievers-bm25>=0.3.0
+llama-index-vector-stores-faiss>=0.2.1
+loguru>=0.7.2
+omegaconf>=2.3.0
+opea-comps>=0.9
+py-cpuinfo>=9.0.0
+uvicorn>=0.30.6
diff --git a/EdgeCraftRAG/edgecraftrag/server.py b/EdgeCraftRAG/edgecraftrag/server.py
new file mode 100644
index 000000000..705c3f07b
--- /dev/null
+++ b/EdgeCraftRAG/edgecraftrag/server.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+import uvicorn
+from edgecraftrag.api.v1.chatqna import chatqna_app
+from edgecraftrag.api.v1.data import data_app
+from edgecraftrag.api.v1.model import model_app
+from edgecraftrag.api.v1.pipeline import pipeline_app
+from fastapi import FastAPI
+from llama_index.core.settings import Settings
+
+app = FastAPI()
+
+sub_apps = [data_app, model_app, pipeline_app, chatqna_app]
+for sub_app in sub_apps:
+    for route in sub_app.routes:
+        app.router.routes.append(route)
+
+
+if __name__ == "__main__":
+    Settings.llm = None
+
+    host = os.getenv("PIPELINE_SERVICE_HOST_IP", "0.0.0.0")
+    port = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+    uvicorn.run(app, host=host, port=port)
diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
new file mode 100644
index 000000000..18895d6e5
--- /dev/null
+++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json
@@ -0,0 +1,41 @@
+{
+  "name": "rag_test_local_llm",
+  "node_parser": {
+    "chunk_size": 400,
+    "chunk_overlap": 48,
+    "parser_type": "simple"
+  },
+  "indexer": {
+    "indexer_type": "faiss_vector",
+    "embedding_model": {
+      "model_id": "BAAI/bge-small-en-v1.5",
+      "model_path": "./models/bge_ov_embedding",
+      "device": "auto"
+    }
+  },
+  "retriever": {
+    "retriever_type": "vectorsimilarity",
+    "retrieve_topk": 30
+  },
+  "postprocessor": [
+    {
+      "processor_type": "reranker",
+      "top_n": 2,
+      "reranker_model": {
+        "model_id": "BAAI/bge-reranker-large",
+        "model_path": "./models/bge_ov_reranker",
+        "device": "auto"
+      }
+    }
+  ],
+  "generator": {
+    "model": {
+      "model_id": "Qwen/Qwen2-7B-Instruct",
+      "model_path": "./models/qwen2-7b-instruct/INT4_compressed_weights",
+      "device": "cpu"
+    },
+    "prompt_path": "./edgecraftrag/prompt_template/default_prompt.txt",
+    "inference_type": "local"
+  },
+  "active": "True"
+}
diff --git a/EdgeCraftRAG/ui/docker/Dockerfile.ui b/EdgeCraftRAG/ui/docker/Dockerfile.ui
new file mode 100644
index 000000000..d8e5eec8e
--- /dev/null
+++ b/EdgeCraftRAG/ui/docker/Dockerfile.ui
@@ -0,0 +1,23 @@
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev 
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY ./ui/gradio /home/user/ui
+COPY ./edgecraftrag /home/user/edgecraftrag
+
+WORKDIR /home/user/edgecraftrag
+RUN pip install --no-cache-dir -r requirements.txt
+
+WORKDIR /home/user/ui
+
+USER user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "ecragui.py"]
\ No newline at end of file
diff --git a/EdgeCraftRAG/ui/gradio/__init__.py b/EdgeCraftRAG/ui/gradio/__init__.py
new file mode 100644
index 000000000..916f3a44b
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png
new file mode 100644
index 000000000..527b9ad94
Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-ondark-3000.png differ
diff --git a/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png
new file mode 100644
index 000000000..707ddd251
Binary files /dev/null and b/EdgeCraftRAG/ui/gradio/assets/ai-logo-inline-onlight-3000.png differ
diff --git a/EdgeCraftRAG/ui/gradio/config.py b/EdgeCraftRAG/ui/gradio/config.py
new file mode 100644
index 000000000..477aba7c2
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/config.py
@@ -0,0 +1,358 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+If a question does not make any sense or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
+"""
+
+DEFAULT_SYSTEM_PROMPT_CHINESE = """\
+你是一个乐于助人、尊重他人以及诚实可靠的助手。在安全的情况下，始终尽可能有帮助地回答。 您的回答不应包含任何有害、不道德、种族主义、性别歧视、有毒、危险或非法的内容。请确保您的回答在社会上是公正的和积极的。
+如果一个问题没有任何意义或与事实不符，请解释原因，而不是回答错误的问题。如果您不知道问题的答案，请不要分享虚假信息。另外，答案请使用中文。\
+"""
+
+DEFAULT_SYSTEM_PROMPT_JAPANESE = """\
+あなたは親切で、礼儀正しく、誠実なアシスタントです。 常に安全を保ちながら、できるだけ役立つように答えてください。 回答には、有害、非倫理的、人種差別的、性差別的、有毒、危険、または違法なコンテンツを含めてはいけません。 回答は社会的に偏見がなく、本質的に前向きなものであることを確認してください。
+質問が意味をなさない場合、または事実に一貫性がない場合は、正しくないことに答えるのではなく、その理由を説明してください。 質問の答えがわからない場合は、誤った情報を共有しないでください。\
+"""
+
+DEFAULT_RAG_PROMPT = """\
+You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\
+"""
+
+DEFAULT_RAG_PROMPT_CHINESE = """\
+基于以下已知信息，请简洁并专业地回答用户的问题。如果无法从中得到答案，请说 "根据已知信息无法回答该问题" 或 "没有提供足够的相关信息"。不允许在答案中添加编造成分。另外，答案请使用中文。\
+"""
+
+
+def red_pijama_partial_text_processor(partial_text, new_text):
+    if new_text == "<":
+        return partial_text
+
+    partial_text += new_text
+    return partial_text.split("<bot>:")[-1]
+
+
+def llama_partial_text_processor(partial_text, new_text):
+    new_text = new_text.replace("[INST]", "").replace("[/INST]", "")
+    partial_text += new_text
+    return partial_text
+
+
+def chatglm_partial_text_processor(partial_text, new_text):
+    new_text = new_text.strip()
+    new_text = new_text.replace("[[训练时间]]", "2023年")
+    partial_text += new_text
+    return partial_text
+
+
+def youri_partial_text_processor(partial_text, new_text):
+    new_text = new_text.replace("システム:", "")
+    partial_text += new_text
+    return partial_text
+
+
+def internlm_partial_text_processor(partial_text, new_text):
+    partial_text += new_text
+    return partial_text.split("<|im_end|>")[0]
+
+
+SUPPORTED_LLM_MODELS = {
+    "English": {
+        "tiny-llama-1b-chat": {
+            "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "gemma-2b-it": {
+            "model_id": "google/gemma-2b-it",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT + ", ",
+            "history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
+            "current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+            + """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
+        },
+        "red-pajama-3b-chat": {
+            "model_id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+            "remote_code": False,
+            "start_message": "",
+            "history_template": "\n<human>:{user}\n<bot>:{assistant}",
+            "stop_tokens": [29, 0],
+            "partial_text_processor": red_pijama_partial_text_processor,
+            "current_message_template": "\n<human>:{user}\n<bot>:{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT }"""
+            + """
+            <human>: Question: {input}
+            Context: {context}
+            Answer: <bot>""",
+        },
+        "gemma-7b-it": {
+            "model_id": "google/gemma-7b-it",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT + ", ",
+            "history_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}<end_of_turn>",
+            "current_message_template": "<start_of_turn>user{user}<end_of_turn><start_of_turn>model{assistant}",
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT},"""
+            + """<start_of_turn>user{input}<end_of_turn><start_of_turn>context{context}<end_of_turn><start_of_turn>model""",
+        },
+        "llama-2-chat-7b": {
+            "model_id": "meta-llama/Llama-2-7b-chat-hf",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""[INST]Human: <<SYS>> {DEFAULT_RAG_PROMPT }<</SYS>>"""
+            + """
+            Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+        "mpt-7b-chat": {
+            "model_id": "mosaicml/mpt-7b-chat",
+            "remote_code": False,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT }<|im_end|>"""
+            + """
+            <|im_start|>user
+            Question: {input}
+            Context: {context}
+            Answer: <im_end><|im_start|>assistant""",
+        },
+        "mistral-7b": {
+            "model_id": "mistralai/Mistral-7B-v0.1",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
+            + """
+            [INST] Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+        "zephyr-7b-beta": {
+            "model_id": "HuggingFaceH4/zephyr-7b-beta",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "notus-7b-v1": {
+            "model_id": "argilla/notus-7b-v1",
+            "remote_code": False,
+            "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}</s>\n",
+            "history_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}</s> \n",
+            "current_message_template": "<|user|>\n{user}</s> \n<|assistant|>\n{assistant}",
+            "rag_prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT }</s>"""
+            + """
+            <|user|>
+            Question: {input}
+            Context: {context}
+            Answer: </s>
+            <|assistant|>""",
+        },
+        "neural-chat-7b-v3-1": {
+            "model_id": "Intel/neural-chat-7b-v3-3",
+            "remote_code": False,
+            "start_message": f"<s>[INST] <<SYS>>\n{DEFAULT_SYSTEM_PROMPT }\n<</SYS>>\n\n",
+            "history_template": "{user}[/INST]{assistant}</s><s>[INST]",
+            "current_message_template": "{user} [/INST]{assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": llama_partial_text_processor,
+            "rag_prompt_template": f"""<s> [INST] {DEFAULT_RAG_PROMPT } [/INST] </s>"""
+            + """
+            [INST] Question: {input}
+            Context: {context}
+            Answer: [/INST]""",
+        },
+    },
+    "Chinese": {
+        "qwen1.5-0.5b-chat": {
+            "model_id": "Qwen/Qwen1.5-0.5B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+        },
+        "qwen1.5-7b-chat": {
+            "model_id": "Qwen/Qwen1.5-7B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "summarization_prompt_template": """
+            <|im_start|>user
+            问题: 总结下文内容，不少于{character_num}字.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "split_summary_template": """
+            <|im_start|>user
+            问题: 根据已知内容写一篇简短的摘要.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "combine_summary_template": """
+            <|im_start|>user
+            问题: 根据已知内容写一篇摘要,不少于{character_num}字.
+            已知内容: {text}
+            回答: <|im_end|><|im_start|>assistant""",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "qwen-7b-chat": {
+            "model_id": "Qwen/Qwen-7B-Chat",
+            "remote_code": True,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "qwen2-7b-instruct": {
+            "model_id": "Qwen/Qwen2-7B-Instruct",
+            "remote_code": True,
+            "start_message": f"<|im_start|>system\n {DEFAULT_SYSTEM_PROMPT_CHINESE }<|im_end|>",
+            "history_template": "<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}<|im_end|>",
+            "current_message_template": '"<|im_start|>user\n{user}<im_end><|im_start|>assistant\n{assistant}',
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "revision": "2abd8e5777bb4ce9c8ab4be7dbbd0fe4526db78d",
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+        "chatglm3-6b": {
+            "model_id": "THUDM/chatglm3-6b",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "stop_tokens": [0, 2],
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
+            + """
+            问题: {input}
+            已知内容: {context}
+            回答:
+            """,
+        },
+        "baichuan2-7b-chat": {
+            "model_id": "baichuan-inc/Baichuan2-7B-Chat",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "stop_tokens": [0, 2],
+            "rag_prompt_template": f"""{DEFAULT_RAG_PROMPT_CHINESE }"""
+            + """
+            问题: {input}
+            已知内容: {context}
+            回答:
+            """,
+        },
+        "minicpm-2b-dpo": {
+            "model_id": "openbmb/MiniCPM-2B-dpo-fp16",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": [2],
+        },
+        "internlm2-chat-1.8b": {
+            "model_id": "internlm/internlm2-chat-1_8b",
+            "remote_code": True,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": [2, 92542],
+            "partial_text_processor": internlm_partial_text_processor,
+        },
+        "qwen1.5-1.8b-chat": {
+            "model_id": "Qwen/Qwen1.5-1.8B-Chat",
+            "remote_code": False,
+            "start_message": DEFAULT_SYSTEM_PROMPT_CHINESE,
+            "stop_tokens": ["<|im_end|>", "<|endoftext|>"],
+            "rag_prompt_template": f"""<|im_start|>system
+            {DEFAULT_RAG_PROMPT_CHINESE }<|im_end|>"""
+            + """
+            <|im_start|>user
+            问题: {input}
+            已知内容: {context}
+            回答: <|im_end|><|im_start|>assistant""",
+        },
+    },
+    "Japanese": {
+        "youri-7b-chat": {
+            "model_id": "rinna/youri-7b-chat",
+            "remote_code": False,
+            "start_message": f"設定: {DEFAULT_SYSTEM_PROMPT_JAPANESE}\n",
+            "history_template": "ユーザー: {user}\nシステム: {assistant}\n",
+            "current_message_template": "ユーザー: {user}\nシステム: {assistant}",
+            "tokenizer_kwargs": {"add_special_tokens": False},
+            "partial_text_processor": youri_partial_text_processor,
+        },
+    },
+}
+
+SUPPORTED_EMBEDDING_MODELS = {
+    "English": {
+        "bge-small-en-v1.5": {
+            "model_id": "BAAI/bge-small-en-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+        "bge-large-en-v1.5": {
+            "model_id": "BAAI/bge-large-en-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+    },
+    "Chinese": {
+        "bge-small-zh-v1.5": {
+            "model_id": "BAAI/bge-small-zh-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+        "bge-large-zh-v1.5": {
+            "model_id": "bge-large-zh-v1.5",
+            "mean_pooling": False,
+            "normalize_embeddings": True,
+        },
+    },
+}
+
+
+SUPPORTED_RERANK_MODELS = {
+    "bge-reranker-large": {"model_id": "BAAI/bge-reranker-large"},
+    "bge-reranker-base": {"model_id": "BAAI/bge-reranker-base"},
+}
diff --git a/EdgeCraftRAG/ui/gradio/default.yaml b/EdgeCraftRAG/ui/gradio/default.yaml
new file mode 100644
index 000000000..1421da8f4
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/default.yaml
@@ -0,0 +1,49 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Model language for LLM
+model_language: "Chinese"
+vector_db:  "FAISS"
+splitter_name: "RecursiveCharacter"
+k_rerank: 5
+search_method: "similarity"
+score_threshold: 0.5
+bm25_weight: 0
+
+# Pipeline
+name: "default"
+
+# Node parser
+node_parser: "simple"
+chunk_size: 192
+chunk_overlap: 48
+
+# Indexer
+indexer: "faiss_vector"
+
+# Retriever
+retriever: "vectorsimilarity"
+k_retrieval: 30
+
+# Post Processor
+postprocessor: "reranker"
+
+# Generator
+generator: "local"
+prompt_path: "./data/default_prompt.txt"
+
+# Models
+embedding_model_id: "BAAI/bge-small-en-v1.5"
+embedding_model_path: "./bge_ov_embedding"
+# Device for embedding model inference
+embedding_device: "AUTO"
+
+rerank_model_id: "BAAI/bge-reranker-large"
+rerank_model_path: "./bge_ov_reranker"
+# Device for reranking model inference
+rerank_device: "AUTO"
+
+llm_model_id: "qwen2-7b-instruct"
+llm_model_path: "./qwen2-7b-instruct/INT4_compressed_weights"
+# Device for LLM model inference
+llm_device: "AUTO"
diff --git a/EdgeCraftRAG/ui/gradio/ecrag_client.py b/EdgeCraftRAG/ui/gradio/ecrag_client.py
new file mode 100644
index 000000000..47b5f776d
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/ecrag_client.py
@@ -0,0 +1,124 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import sys
+
+import requests
+
+sys.path.append("..")
+import os
+
+from edgecraftrag import api_schema
+
+PIPELINE_SERVICE_HOST_IP = os.getenv("PIPELINE_SERVICE_HOST_IP", "127.0.0.1")
+PIPELINE_SERVICE_PORT = int(os.getenv("PIPELINE_SERVICE_PORT", 16010))
+server_addr = f"http://{PIPELINE_SERVICE_HOST_IP}:{PIPELINE_SERVICE_PORT}"
+
+
+def get_current_pipelines():
+    res = requests.get(f"{server_addr}/v1/settings/pipelines", proxies={"http": None})
+    pls = []
+    for pl in res.json():
+        if pl["status"]["active"]:
+            pls.append((pl["idx"], pl["name"] + " (active)"))
+        else:
+            pls.append((pl["idx"], pl["name"]))
+    return pls
+
+
+def get_pipeline(name):
+    res = requests.get(f"{server_addr}/v1/settings/pipelines/{name}", proxies={"http": None})
+    return res.json()
+
+
+def create_update_pipeline(
+    name,
+    active,
+    node_parser,
+    chunk_size,
+    chunk_overlap,
+    indexer,
+    retriever,
+    vector_search_top_k,
+    postprocessor,
+    generator,
+    llm_id,
+    llm_device,
+    llm_weights,
+    embedding_id,
+    embedding_device,
+    rerank_id,
+    rerank_device,
+):
+    req_dict = api_schema.PipelineCreateIn(
+        name=name,
+        active=active,
+        node_parser=api_schema.NodeParserIn(
+            parser_type=node_parser, chunk_size=chunk_size, chunk_overlap=chunk_overlap
+        ),
+        indexer=api_schema.IndexerIn(
+            indexer_type=indexer,
+            embedding_model=api_schema.ModelIn(
+                model_id=embedding_id,
+                # TODO: remove hardcoding
+                model_path="./bge_ov_embedding",
+                device=embedding_device,
+            ),
+        ),
+        retriever=api_schema.RetrieverIn(retriever_type=retriever, retriever_topk=vector_search_top_k),
+        postprocessor=[
+            api_schema.PostProcessorIn(
+                processor_type=postprocessor[0],
+                reranker_model=api_schema.ModelIn(
+                    model_id=rerank_id,
+                    # TODO: remove hardcoding
+                    model_path="./bge_ov_reranker",
+                    device=rerank_device,
+                ),
+            )
+        ],
+        generator=api_schema.GeneratorIn(
+            # TODO: remove hardcoding
+            prompt_path="./edgecraftrag/prompt_template/default_prompt.txt",
+            model=api_schema.ModelIn(
+                model_id=llm_id,
+                # TODO: remove hardcoding
+                model_path="./models/qwen2-7b-instruct/INT4_compressed_weights",
+                device=llm_device,
+            ),
+        ),
+    )
+    # hard code only for test
+    print(req_dict)
+    res = requests.post(f"{server_addr}/v1/settings/pipelines", json=req_dict.dict(), proxies={"http": None})
+    return res.text
+
+
+def activate_pipeline(name):
+    active_dict = {"active": "True"}
+    res = requests.patch(f"{server_addr}/v1/settings/pipelines/{name}", json=active_dict, proxies={"http": None})
+    status = False
+    restext = f"Activate pipeline {name} failed."
+    if res.ok:
+        status = True
+        restext = f"Activate pipeline {name} successfully."
+    return restext, status
+
+
+def create_vectordb(docs, spliter, vector_db):
+    req_dict = api_schema.FilesIn(local_paths=docs)
+    res = requests.post(f"{server_addr}/v1/data/files", json=req_dict.dict(), proxies={"http": None})
+    return res.text
+
+
+def get_files():
+    res = requests.get(f"{server_addr}/v1/data/files", proxies={"http": None})
+    files = []
+    for file in res.json():
+        files.append((file["file_name"], file["file_id"]))
+    return files
+
+
+def delete_file(file_name_or_id):
+    res = requests.delete(f"{server_addr}/v1/data/files/{file_name_or_id}", proxies={"http": None})
+    return res.text
diff --git a/EdgeCraftRAG/ui/gradio/ecragui.py b/EdgeCraftRAG/ui/gradio/ecragui.py
new file mode 100644
index 000000000..3c198bf2a
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/ecragui.py
@@ -0,0 +1,983 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import json
+import platform
+import re
+from datetime import datetime
+from pathlib import Path
+
+import cpuinfo
+import distro  # if running Python 3.8 or above
+import ecrag_client as cli
+import gradio as gr
+import httpx
+
+# Creation of the ModelLoader instance and loading models remain the same
+import platform_config as pconf
+import psutil
+import requests
+from loguru import logger
+from omegaconf import OmegaConf
+from platform_config import get_available_devices, get_available_weights, get_local_available_models
+
+pipeline_df = []
+
+import os
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "127.0.0.1")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 16011))
+UI_SERVICE_HOST_IP = os.getenv("UI_SERVICE_HOST_IP", "0.0.0.0")
+UI_SERVICE_PORT = int(os.getenv("UI_SERVICE_PORT", 8084))
+
+
+def get_llm_model_dir(llm_model_id, weights_compression):
+    model_dirs = {
+        "fp16_model_dir": Path(llm_model_id) / "FP16",
+        "int8_model_dir": Path(llm_model_id) / "INT8_compressed_weights",
+        "int4_model_dir": Path(llm_model_id) / "INT4_compressed_weights",
+    }
+
+    if weights_compression == "INT4":
+        model_dir = model_dirs["int4_model_dir"]
+    elif weights_compression == "INT8":
+        model_dir = model_dirs["int8_model_dir"]
+    else:
+        model_dir = model_dirs["fp16_model_dir"]
+
+    if not model_dir.exists():
+        raise FileNotFoundError(f"The model directory {model_dir} does not exist.")
+    elif not model_dir.is_dir():
+        raise NotADirectoryError(f"The path {model_dir} is not a directory.")
+
+    return model_dir
+
+
+def get_system_status():
+    cpu_usage = psutil.cpu_percent(interval=1)
+    memory_info = psutil.virtual_memory()
+    memory_usage = memory_info.percent
+    memory_total_gb = memory_info.total / (1024**3)
+    memory_used_gb = memory_info.used / (1024**3)
+    # uptime_seconds = time.time() - psutil.boot_time()
+    # uptime_hours, uptime_minutes = divmod(uptime_seconds // 60, 60)
+    disk_usage = psutil.disk_usage("/").percent
+    # net_io = psutil.net_io_counters()
+    os_info = platform.uname()
+    kernel_version = os_info.release
+    processor = cpuinfo.get_cpu_info()["brand_raw"]
+    dist_name = distro.name(pretty=True)
+
+    now = datetime.now()
+    current_time_str = now.strftime("%Y-%m-%d %H:%M")
+
+    status = (
+        f"{current_time_str} \t"
+        f"CPU Usage: {cpu_usage}% \t"
+        f"Memory Usage: {memory_usage}% {memory_used_gb:.2f}GB / {memory_total_gb:.2f}GB \t"
+        # f"System Uptime: {int(uptime_hours)} hours, {int(uptime_minutes)} minutes \t"
+        f"Disk Usage: {disk_usage}% \t"
+        # f"Bytes Sent: {net_io.bytes_sent}\n"
+        # f"Bytes Received: {net_io.bytes_recv}\n"
+        f"Kernel: {kernel_version} \t"
+        f"Processor: {processor} \t"
+        f"OS: {dist_name} \n"
+    )
+    return status
+
+
+def build_demo(cfg, args):
+
+    def load_chatbot_models(
+        llm_id,
+        llm_device,
+        llm_weights,
+        embedding_id,
+        embedding_device,
+        rerank_id,
+        rerank_device,
+    ):
+        req_dict = {
+            "llm_id": llm_id,
+            "llm_device": llm_device,
+            "llm_weights": llm_weights,
+            "embedding_id": embedding_id,
+            "embedding_device": embedding_device,
+            "rerank_id": rerank_id,
+            "rerank_device": rerank_device,
+        }
+        # hard code only for test
+        worker_addr = "http://127.0.0.1:8084"
+        print(req_dict)
+        result = requests.post(f"{worker_addr}/load", json=req_dict, proxies={"http": None})
+        return result.text
+
+    def user(message, history):
+        """Callback function for updating user messages in interface on submit button click.
+
+        Params:
+        message: current message
+        history: conversation history
+        Returns:
+        None
+        """
+        # Append the user's message to the conversation history
+        return "", history + [[message, ""]]
+
+    async def bot(
+        history,
+        temperature,
+        top_p,
+        top_k,
+        repetition_penalty,
+        hide_full_prompt,
+        do_rag,
+        docs,
+        spliter_name,
+        vector_db,
+        chunk_size,
+        chunk_overlap,
+        vector_search_top_k,
+        vector_search_top_n,
+        run_rerank,
+        search_method,
+        score_threshold,
+    ):
+        """Callback function for running chatbot on submit button click.
+
+        Params:
+        history: conversation history
+        temperature:  parameter for control the level of creativity in AI-generated text.
+                        By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse.
+        top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability.
+        top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.
+        repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.
+        conversation_id: unique conversation identifier.
+        """
+        # req_dict = {
+        #     "history": history,
+        #     "temperature": temperature,
+        #     "top_p": top_p,
+        #     "top_k": top_k,
+        #     "repetition_penalty": repetition_penalty,
+        #     "hide_full_prompt": hide_full_prompt,
+        #     "do_rag": do_rag,
+        #     "docs": docs,
+        #     "spliter_name": spliter_name,
+        #     "vector_db": vector_db,
+        #     "chunk_size": chunk_size,
+        #     "chunk_overlap": chunk_overlap,
+        #     "vector_search_top_k": vector_search_top_k,
+        #     "vector_search_top_n": vector_search_top_n,
+        #     "run_rerank": run_rerank,
+        #     "search_method": search_method,
+        #     "score_threshold": score_threshold,
+        #     "streaming": True
+        # }
+        print(history)
+        new_req = {"messages": history[-1][0]}
+        server_addr = f"http://{MEGA_SERVICE_HOST_IP}:{MEGA_SERVICE_PORT}"
+
+        # Async for streaming response
+        partial_text = ""
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", f"{server_addr}/v1/chatqna", json=new_req, timeout=None) as response:
+                partial_text = ""
+                async for chunk in response.aiter_lines():
+                    new_text = chunk
+                    if new_text.startswith("data"):
+                        new_text = re.sub(r"\r\n", "", chunk.split("data: ")[-1])
+                    new_text = json.loads(chunk)["choices"][0]["message"]["content"]
+                    partial_text = partial_text + new_text
+                    history[-1][1] = partial_text
+                    yield history
+
+    avail_llms = get_local_available_models("llm")
+    avail_embed_models = get_local_available_models("embed")
+    avail_rerank_models = get_local_available_models("rerank")
+    avail_devices = get_available_devices()
+    avail_weights_compression = get_available_weights()
+    avail_node_parsers = pconf.get_available_node_parsers()
+    avail_indexers = pconf.get_available_indexers()
+    avail_retrievers = pconf.get_available_retrievers()
+    avail_postprocessors = pconf.get_available_postprocessors()
+    avail_generators = pconf.get_available_generators()
+
+    css = """
+    .feedback textarea {font-size: 18px; !important }
+    #blude_border {border: 1px solid #0000FF}
+    #white_border {border: 2px solid #FFFFFF}
+    .test textarea {color: E0E0FF; border: 1px solid #0000FF}
+    .disclaimer {font-variant-caps: all-small-caps}
+    """
+
+    with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+        gr.HTML(
+            """
+            <!DOCTYPE html>
+            <html>
+            <head>
+            <style>
+            .container {
+                display: flex; /* Establish a flex container */
+                align-items: center; /* Vertically align everything in the middle */
+                width: 100%; /* Take the full width of the container */
+            }
+
+            .title-container {
+                flex-grow: 1; /* Allow the title to grow and occupy the available space */
+                text-align: center; /* Center the text block inside the title container */
+            }
+
+            .title-line {
+                display: block; /* Makes the span behave like a div in terms of layout */
+                line-height: 1.2; /* Adjust this value as needed for better appearance */
+            }
+
+            img {
+                /* Consider setting a specific width or height if necessary */
+            }
+            </style>
+            </head>
+            <body>
+
+            <div class="container">
+            <!-- Image aligned to the left -->
+            <a href="https://www.intel.cn/content/www/cn/zh/artificial-intelligence/overview.html"><img src="/file/assets/ai-logo-inline-onlight-3000.png" alt="Sample Image" width="200"></a>
+
+            <!-- Title centered in the remaining space -->
+                <!-- Title container centered in the remaining space -->
+                <div class="title-container">
+                    <span class="title-line"><h1 >Edge Craft RAG based Q&A Chatbot</h1></span>
+                    <span class="title-line"><h5 style="margin: 0;">Powered by Intel NEXC Edge AI solutions</h5></span>
+                </div>
+            </div>
+
+            </body>
+            </html>
+            """
+        )
+        _ = gr.Textbox(
+            label="System Status",
+            value=get_system_status,
+            max_lines=1,
+            every=1,
+            info="",
+            elem_id="white_border",
+        )
+
+        def get_pipeline_df():
+            global pipeline_df
+            pipeline_df = cli.get_current_pipelines()
+            return pipeline_df
+
+        # -------------------
+        # RAG Settings Layout
+        # -------------------
+        with gr.Tab("RAG Settings"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    u_pipelines = gr.Dataframe(
+                        headers=["ID", "Name"],
+                        column_widths=[70, 30],
+                        value=get_pipeline_df,
+                        label="Pipelines",
+                        show_label=True,
+                        interactive=False,
+                        every=5,
+                    )
+
+                    u_rag_pipeline_status = gr.Textbox(label="Status", value="", interactive=False)
+
+                with gr.Column(scale=3):
+                    with gr.Accordion("Pipeline Configuration"):
+                        with gr.Row():
+                            rag_create_pipeline = gr.Button("Create Pipeline")
+                            rag_activate_pipeline = gr.Button("Activate Pipeline")
+                            rag_remove_pipeline = gr.Button("Remove Pipeline")
+
+                        with gr.Column(variant="panel"):
+                            u_pipeline_name = gr.Textbox(
+                                label="Name",
+                                value=cfg.name,
+                                interactive=True,
+                            )
+                            u_active = gr.Checkbox(
+                                value=True,
+                                label="Activated",
+                                interactive=True,
+                            )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Node Parser"):
+                                u_node_parser = gr.Dropdown(
+                                    choices=avail_node_parsers,
+                                    label="Node Parser",
+                                    value=cfg.node_parser,
+                                    info="Select a parser to split documents.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                                u_chunk_size = gr.Slider(
+                                    label="Chunk size",
+                                    value=cfg.chunk_size,
+                                    minimum=100,
+                                    maximum=2000,
+                                    step=50,
+                                    interactive=True,
+                                    info="Size of sentence chunk",
+                                )
+
+                                u_chunk_overlap = gr.Slider(
+                                    label="Chunk overlap",
+                                    value=cfg.chunk_overlap,
+                                    minimum=0,
+                                    maximum=400,
+                                    step=1,
+                                    interactive=True,
+                                    info=("Overlap between 2 chunks"),
+                                )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Indexer"):
+                                u_indexer = gr.Dropdown(
+                                    choices=avail_indexers,
+                                    label="Indexer",
+                                    value=cfg.indexer,
+                                    info="Select an indexer for indexing content of the documents.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("Embedding Model Configuration"):
+                                    u_embed_model_id = gr.Dropdown(
+                                        choices=avail_embed_models,
+                                        value=cfg.embedding_model_id,
+                                        label="Embedding Model",
+                                        # info="Select a Embedding Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_embed_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.embedding_device,
+                                        label="Embedding run device",
+                                        # info="Run embedding model on which device?",
+                                        multiselect=False,
+                                    )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Retriever"):
+                                u_retriever = gr.Dropdown(
+                                    choices=avail_retrievers,
+                                    value=cfg.retriever,
+                                    label="Retriever",
+                                    info="Select a retriever for retrieving context.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                                u_vector_search_top_k = gr.Slider(
+                                    1,
+                                    50,
+                                    value=cfg.k_retrieval,
+                                    step=1,
+                                    label="Search top k",
+                                    info="Number of searching results, must >= Rerank top n",
+                                    interactive=True,
+                                )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Postprocessor"):
+                                u_postprocessor = gr.Dropdown(
+                                    choices=avail_postprocessors,
+                                    value=cfg.postprocessor,
+                                    label="Postprocessor",
+                                    info="Select postprocessors for post-processing of the context.",
+                                    multiselect=True,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("Rerank Model Configuration", open=True):
+                                    u_rerank_model_id = gr.Dropdown(
+                                        choices=avail_rerank_models,
+                                        value=cfg.rerank_model_id,
+                                        label="Rerank Model",
+                                        # info="Select a Rerank Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_rerank_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.rerank_device,
+                                        label="Rerank run device",
+                                        # info="Run rerank model on which device?",
+                                        multiselect=False,
+                                    )
+
+                        with gr.Column(variant="panel"):
+                            with gr.Accordion("Generator"):
+                                u_generator = gr.Dropdown(
+                                    choices=avail_generators,
+                                    value=cfg.generator,
+                                    label="Generator",
+                                    info="Select a generator for AI inference.",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+
+                                with gr.Accordion("LLM Configuration", open=True):
+                                    u_llm_model_id = gr.Dropdown(
+                                        choices=avail_llms,
+                                        value=cfg.llm_model_id,
+                                        label="Large Language Model",
+                                        # info="Select a Large Language Model",
+                                        multiselect=False,
+                                        allow_custom_value=True,
+                                    )
+
+                                    u_llm_device = gr.Dropdown(
+                                        choices=avail_devices,
+                                        value=cfg.llm_device,
+                                        label="LLM run device",
+                                        # info="Run LLM on which device?",
+                                        multiselect=False,
+                                    )
+
+                                    u_llm_weights = gr.Radio(
+                                        avail_weights_compression,
+                                        label="Weights",
+                                        info="weights compression",
+                                    )
+
+        # -------------------
+        # RAG Settings Events
+        # -------------------
+        # Event handlers
+        def show_pipeline_detail(evt: gr.SelectData):
+            # get selected pipeline id
+            # Dataframe: {'headers': '', 'data': [[x00, x01], [x10, x11]}
+            # SelectData.index: [i, j]
+            print(u_pipelines.value["data"])
+            print(evt.index)
+            # always use pipeline id for indexing
+            selected_id = pipeline_df[evt.index[0]][0]
+            pl = cli.get_pipeline(selected_id)
+            # TODO: change to json fomart
+            # pl["postprocessor"][0]["processor_type"]
+            # pl["postprocessor"]["model"]["model_id"], pl["postprocessor"]["model"]["device"]
+            return (
+                pl["name"],
+                pl["status"]["active"],
+                pl["node_parser"]["parser_type"],
+                pl["node_parser"]["chunk_size"],
+                pl["node_parser"]["chunk_overlap"],
+                pl["indexer"]["indexer_type"],
+                pl["retriever"]["retriever_type"],
+                pl["retriever"]["retrieve_topk"],
+                pl["generator"]["generator_type"],
+                pl["generator"]["model"]["model_id"],
+                pl["generator"]["model"]["device"],
+                "",
+                pl["indexer"]["model"]["model_id"],
+                pl["indexer"]["model"]["device"],
+            )
+
+        def modify_create_pipeline_button():
+            return "Create Pipeline"
+
+        def modify_update_pipeline_button():
+            return "Update Pipeline"
+
+        def create_update_pipeline(
+            name,
+            active,
+            node_parser,
+            chunk_size,
+            chunk_overlap,
+            indexer,
+            retriever,
+            vector_search_top_k,
+            postprocessor,
+            generator,
+            llm_id,
+            llm_device,
+            llm_weights,
+            embedding_id,
+            embedding_device,
+            rerank_id,
+            rerank_device,
+        ):
+            res = cli.create_update_pipeline(
+                name,
+                active,
+                node_parser,
+                chunk_size,
+                chunk_overlap,
+                indexer,
+                retriever,
+                vector_search_top_k,
+                postprocessor,
+                generator,
+                llm_id,
+                llm_device,
+                llm_weights,
+                embedding_id,
+                embedding_device,
+                rerank_id,
+                rerank_device,
+            )
+            return res, get_pipeline_df()
+
+        # Events
+        u_pipelines.select(
+            show_pipeline_detail,
+            inputs=None,
+            outputs=[
+                u_pipeline_name,
+                u_active,
+                # node parser
+                u_node_parser,
+                u_chunk_size,
+                u_chunk_overlap,
+                # indexer
+                u_indexer,
+                # retriever
+                u_retriever,
+                u_vector_search_top_k,
+                # postprocessor
+                # u_postprocessor,
+                # generator
+                u_generator,
+                # models
+                u_llm_model_id,
+                u_llm_device,
+                u_llm_weights,
+                u_embed_model_id,
+                u_embed_device,
+                # u_rerank_model_id,
+                # u_rerank_device
+            ],
+        )
+
+        u_pipeline_name.input(modify_create_pipeline_button, inputs=None, outputs=rag_create_pipeline)
+
+        # Create pipeline button will change to update pipeline button if any
+        # of the listed fields changed
+        gr.on(
+            triggers=[
+                u_active.input,
+                # node parser
+                u_node_parser.input,
+                u_chunk_size.input,
+                u_chunk_overlap.input,
+                # indexer
+                u_indexer.input,
+                # retriever
+                u_retriever.input,
+                u_vector_search_top_k.input,
+                # postprocessor
+                u_postprocessor.input,
+                # generator
+                u_generator.input,
+                # models
+                u_llm_model_id.input,
+                u_llm_device.input,
+                u_llm_weights.input,
+                u_embed_model_id.input,
+                u_embed_device.input,
+                u_rerank_model_id.input,
+                u_rerank_device.input,
+            ],
+            fn=modify_update_pipeline_button,
+            inputs=None,
+            outputs=rag_create_pipeline,
+        )
+
+        rag_create_pipeline.click(
+            create_update_pipeline,
+            inputs=[
+                u_pipeline_name,
+                u_active,
+                u_node_parser,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_indexer,
+                u_retriever,
+                u_vector_search_top_k,
+                u_postprocessor,
+                u_generator,
+                u_llm_model_id,
+                u_llm_device,
+                u_llm_weights,
+                u_embed_model_id,
+                u_embed_device,
+                u_rerank_model_id,
+                u_rerank_device,
+            ],
+            outputs=[u_rag_pipeline_status, u_pipelines],
+            queue=False,
+        )
+
+        rag_activate_pipeline.click(
+            cli.activate_pipeline,
+            inputs=[u_pipeline_name],
+            outputs=[u_rag_pipeline_status, u_active],
+            queue=False,
+        )
+
+        # --------------
+        # Chatbot Layout
+        # --------------
+        def get_files():
+            return cli.get_files()
+
+        def create_vectordb(docs, spliter, vector_db):
+            res = cli.create_vectordb(docs, spliter, vector_db)
+            return gr.update(value=get_files()), res
+
+        global u_files_selected_row
+        u_files_selected_row = None
+
+        def select_file(data, evt: gr.SelectData):
+            if not evt.selected or len(evt.index) == 0:
+                return "No file selected"
+            global u_files_selected_row
+            row_index = evt.index[0]
+            u_files_selected_row = data.iloc[row_index]
+            file_name, file_id = u_files_selected_row
+            return f"File Name: {file_name}\nFile ID: {file_id}"
+
+        def deselect_file():
+            global u_files_selected_row
+            u_files_selected_row = None
+            return gr.update(value=get_files()), "Selection cleared"
+
+        def delete_file():
+            global u_files_selected_row
+            if u_files_selected_row is None:
+                res = "Please select a file first."
+            else:
+                file_name, file_id = u_files_selected_row
+                u_files_selected_row = None
+                res = cli.delete_file(file_id)
+            return gr.update(value=get_files()), res
+
+        with gr.Tab("Chatbot"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    docs = gr.File(
+                        label="Step 1: Load text files",
+                        file_count="multiple",
+                        file_types=[
+                            ".csv",
+                            ".doc",
+                            ".docx",
+                            ".enex",
+                            ".epub",
+                            ".html",
+                            ".md",
+                            ".odt",
+                            ".pdf",
+                            ".ppt",
+                            ".pptx",
+                            ".txt",
+                        ],
+                    )
+                    retriever_argument = gr.Accordion("Vector Store Configuration", open=False)
+                    with retriever_argument:
+                        spliter = gr.Dropdown(
+                            ["Character", "RecursiveCharacter", "Markdown", "Chinese"],
+                            value=cfg.splitter_name,
+                            label="Text Spliter",
+                            info="Method used to split the documents",
+                            multiselect=False,
+                        )
+
+                        vector_db = gr.Dropdown(
+                            ["FAISS", "Chroma"],
+                            value=cfg.vector_db,
+                            label="Vector Stores",
+                            info="Stores embedded data and performs vector search.",
+                            multiselect=False,
+                        )
+                    load_docs = gr.Button("Upload files")
+
+                    u_files_status = gr.Textbox(label="File Processing Status", value="", interactive=False)
+                    u_files = gr.Dataframe(
+                        headers=["Loaded File Name", "File ID"],
+                        value=get_files,
+                        label="Loaded Files",
+                        show_label=False,
+                        interactive=False,
+                        every=5,
+                    )
+
+                    with gr.Accordion("Delete File", open=False):
+                        selected_files = gr.Textbox(label="Click file to select", value="", interactive=False)
+                        with gr.Row():
+                            with gr.Column():
+                                delete_button = gr.Button("Delete Selected File")
+                            with gr.Column():
+                                deselect_button = gr.Button("Clear Selection")
+
+                    do_rag = gr.Checkbox(
+                        value=True,
+                        label="RAG is ON",
+                        interactive=True,
+                        info="Whether to do RAG for generation",
+                    )
+                    with gr.Accordion("Generation Configuration", open=False):
+                        with gr.Row():
+                            with gr.Column():
+                                with gr.Row():
+                                    temperature = gr.Slider(
+                                        label="Temperature",
+                                        value=0.1,
+                                        minimum=0.0,
+                                        maximum=1.0,
+                                        step=0.1,
+                                        interactive=True,
+                                        info="Higher values produce more diverse outputs",
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    top_p = gr.Slider(
+                                        label="Top-p (nucleus sampling)",
+                                        value=1.0,
+                                        minimum=0.0,
+                                        maximum=1,
+                                        step=0.01,
+                                        interactive=True,
+                                        info=(
+                                            "Sample from the smallest possible set of tokens whose cumulative probability "
+                                            "exceeds top_p. Set to 1 to disable and sample from all tokens."
+                                        ),
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    top_k = gr.Slider(
+                                        label="Top-k",
+                                        value=50,
+                                        minimum=0.0,
+                                        maximum=200,
+                                        step=1,
+                                        interactive=True,
+                                        info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
+                                    )
+                            with gr.Column():
+                                with gr.Row():
+                                    repetition_penalty = gr.Slider(
+                                        label="Repetition Penalty",
+                                        value=1.1,
+                                        minimum=1.0,
+                                        maximum=2.0,
+                                        step=0.1,
+                                        interactive=True,
+                                        info="Penalize repetition — 1.0 to disable.",
+                                    )
+                with gr.Column(scale=4):
+                    chatbot = gr.Chatbot(
+                        height=600,
+                        label="Step 2: Input Query",
+                        show_copy_button=True,
+                    )
+                    with gr.Row():
+                        with gr.Column():
+                            msg = gr.Textbox(
+                                label="QA Message Box",
+                                placeholder="Chat Message Box",
+                                show_label=False,
+                                container=False,
+                            )
+                        with gr.Column():
+                            with gr.Row():
+                                submit = gr.Button("Submit")
+                                stop = gr.Button("Stop")
+                                clear = gr.Button("Clear")
+                    retriever_argument = gr.Accordion("Retriever Configuration", open=True)
+                    with retriever_argument:
+                        with gr.Row():
+                            with gr.Row():
+                                do_rerank = gr.Checkbox(
+                                    value=True,
+                                    label="Rerank searching result",
+                                    interactive=True,
+                                )
+                                hide_context = gr.Checkbox(
+                                    value=True,
+                                    label="Hide searching result in prompt",
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                search_method = gr.Dropdown(
+                                    ["similarity_score_threshold", "similarity", "mmr"],
+                                    value=cfg.search_method,
+                                    label="Searching Method",
+                                    info="Method used to search vector store",
+                                    multiselect=False,
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                score_threshold = gr.Slider(
+                                    0.01,
+                                    0.99,
+                                    value=cfg.score_threshold,
+                                    step=0.01,
+                                    label="Similarity Threshold",
+                                    info="Only working for 'similarity score threshold' method",
+                                    interactive=True,
+                                )
+                            with gr.Row():
+                                vector_rerank_top_n = gr.Slider(
+                                    1,
+                                    10,
+                                    value=cfg.k_rerank,
+                                    step=1,
+                                    label="Rerank top n",
+                                    info="Number of rerank results",
+                                    interactive=True,
+                                )
+        load_docs.click(
+            create_vectordb,
+            inputs=[
+                docs,
+                spliter,
+                vector_db,
+            ],
+            outputs=[u_files, u_files_status],
+            queue=True,
+        )
+        # TODO: Need to de-select the dataframe,
+        # otherwise every time the dataframe is updated, a select event is triggered
+        u_files.select(select_file, inputs=[u_files], outputs=selected_files, queue=True)
+
+        delete_button.click(
+            delete_file,
+            outputs=[u_files, u_files_status],
+            queue=True,
+        )
+        deselect_button.click(
+            deselect_file,
+            outputs=[u_files, selected_files],
+            queue=True,
+        )
+
+        submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [
+                chatbot,
+                temperature,
+                top_p,
+                top_k,
+                repetition_penalty,
+                hide_context,
+                do_rag,
+                docs,
+                spliter,
+                vector_db,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_vector_search_top_k,
+                vector_rerank_top_n,
+                do_rerank,
+                search_method,
+                score_threshold,
+            ],
+            chatbot,
+            queue=True,
+        )
+        submit_click_event = submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+            bot,
+            [
+                chatbot,
+                temperature,
+                top_p,
+                top_k,
+                repetition_penalty,
+                hide_context,
+                do_rag,
+                docs,
+                spliter,
+                vector_db,
+                u_chunk_size,
+                u_chunk_overlap,
+                u_vector_search_top_k,
+                vector_rerank_top_n,
+                do_rerank,
+                search_method,
+                score_threshold,
+            ],
+            chatbot,
+            queue=True,
+        )
+        # stop.click(
+        #     fn=request_cancel,
+        #     inputs=None,
+        #     outputs=None,
+        #     cancels=[submit_event, submit_click_event],
+        #     queue=False,
+        # )
+        clear.click(lambda: None, None, chatbot, queue=False)
+    return demo
+
+
+def main():
+    # Create the parser
+    parser = argparse.ArgumentParser(description="Load Embedding and LLM Models with OpenVino.")
+    # Add the arguments
+    parser.add_argument("--prompt_template", type=str, required=False, help="User specific template")
+    # parser.add_argument("--server_name", type=str, default="0.0.0.0")
+    # parser.add_argument("--server_port", type=int, default=8082)
+    parser.add_argument("--config", type=str, default="./default.yaml", help="configuration file path")
+    parser.add_argument("--share", action="store_true", help="share model")
+    parser.add_argument("--debug", action="store_true", help="enable debugging")
+
+    # Execute the parse_args() method to collect command line arguments
+    args = parser.parse_args()
+    logger.info(args)
+    cfg = OmegaConf.load(args.config)
+    init_cfg_(cfg)
+    logger.info(cfg)
+
+    demo = build_demo(cfg, args)
+    # if you are launching remotely, specify server_name and server_port
+    # demo.launch(server_name='your server name', server_port='server port in int')
+    # if you have any issue to launch on your platform, you can pass share=True to launch method:
+    # demo.launch(share=True)
+    # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/
+    # demo.launch(share=True)
+    demo.queue().launch(
+        server_name=UI_SERVICE_HOST_IP, server_port=UI_SERVICE_PORT, share=args.share, allowed_paths=["."]
+    )
+
+    # %%
+    # please run this cell for stopping gradio interface
+    demo.close()
+
+
+def init_cfg_(cfg):
+    if "name" not in cfg:
+        cfg.name = "default"
+    if "embedding_device" not in cfg:
+        cfg.embedding_device = "CPU"
+    if "rerank_device" not in cfg:
+        cfg.rerank_device = "CPU"
+    if "llm_device" not in cfg:
+        cfg.llm_device = "CPU"
+    if "model_language" not in cfg:
+        cfg.model_language = "Chinese"
+    if "vector_db" not in cfg:
+        cfg.vector_db = "FAISS"
+    if "splitter_name" not in cfg:
+        cfg.splitter_name = "RecursiveCharacter"  # or "Chinese"
+    if "search_method" not in cfg:
+        cfg.search_method = "similarity"
+    if "score_threshold" not in cfg:
+        cfg.score_threshold = 0.5
+
+
+if __name__ == "__main__":
+    main()
diff --git a/EdgeCraftRAG/ui/gradio/platform_config.py b/EdgeCraftRAG/ui/gradio/platform_config.py
new file mode 100644
index 000000000..852409c1c
--- /dev/null
+++ b/EdgeCraftRAG/ui/gradio/platform_config.py
@@ -0,0 +1,114 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+from enum import Enum
+
+import openvino.runtime as ov
+from config import SUPPORTED_EMBEDDING_MODELS, SUPPORTED_LLM_MODELS, SUPPORTED_RERANK_MODELS
+
+sys.path.append("..")
+from edgecraftrag.base import GeneratorType, IndexerType, NodeParserType, PostProcessorType, RetrieverType
+
+
+def _get_llm_model_ids(supported_models, model_language=None):
+    if model_language is None:
+        model_ids = [model_id for model_id, _ in supported_models.items()]
+        return model_ids
+
+    if model_language not in supported_models:
+        print("Invalid model language! Please choose from the available options.")
+        return None
+
+    # Create a list of model IDs based on the selected language
+    llm_model_ids = [
+        model_id
+        for model_id, model_config in supported_models[model_language].items()
+        if model_config.get("rag_prompt_template") or model_config.get("normalize_embeddings")
+    ]
+
+    return llm_model_ids
+
+
+def _list_subdirectories(parent_directory):
+    """List all subdirectories under the given parent directory using os.listdir.
+
+    Parameters:
+    parent_directory (str): The path to the parent directory from which to list subdirectories.
+
+    Returns:
+    list: A list of subdirectory names found in the parent directory.
+    """
+    # Get a list of all entries in the parent directory
+    entries = os.listdir(parent_directory)
+
+    # Filter out the entries to only keep directories
+    subdirectories = [entry for entry in entries if os.path.isdir(os.path.join(parent_directory, entry))]
+
+    return sorted(subdirectories)
+
+
+def _get_available_models(model_ids, local_dirs):
+    """Filters and sorts model IDs based on their presence in the local directories.
+
+    Parameters:
+    model_ids (list): A list of model IDs to check.
+    local_dirs (list): A list of local directory names to check against.
+
+    Returns:
+    list: A sorted list of available model IDs.
+    """
+    # Filter model_ids for those that are present in local directories
+    return sorted([model_id for model_id in model_ids if model_id in local_dirs])
+
+
+def get_local_available_models(model_type: str, local_path: str = "./"):
+    local_dirs = _list_subdirectories(local_path)
+    if model_type == "llm":
+        model_ids = _get_llm_model_ids(SUPPORTED_LLM_MODELS, "Chinese")
+    elif model_type == "embed":
+        model_ids = _get_llm_model_ids(SUPPORTED_EMBEDDING_MODELS, "Chinese")
+    elif model_type == "rerank":
+        model_ids = _get_llm_model_ids(SUPPORTED_RERANK_MODELS)
+    else:
+        print("Unknown model type")
+    avail_models = _get_available_models(model_ids, local_dirs)
+    return avail_models
+
+
+def get_available_devices():
+    core = ov.Core()
+    avail_devices = core.available_devices + ["AUTO"]
+    if "NPU" in avail_devices:
+        avail_devices.remove("NPU")
+    return avail_devices
+
+
+def get_available_weights():
+    avail_weights_compression = ["FP16", "INT8", "INT4"]
+    return avail_weights_compression
+
+
+def get_enum_values(c: Enum):
+    return [v.value for k, v in vars(c).items() if not callable(v) and not k.startswith("__") and not k.startswith("_")]
+
+
+def get_available_node_parsers():
+    return get_enum_values(NodeParserType)
+
+
+def get_available_indexers():
+    return get_enum_values(IndexerType)
+
+
+def get_available_retrievers():
+    return get_enum_values(RetrieverType)
+
+
+def get_available_postprocessors():
+    return get_enum_values(PostProcessorType)
+
+
+def get_available_generators():
+    return get_enum_values(GeneratorType)
diff --git a/FaqGen/docker_compose/intel/cpu/xeon/README.md b/FaqGen/docker_compose/intel/cpu/xeon/README.md
index 04fea0f85..c512621b0 100644
--- a/FaqGen/docker_compose/intel/cpu/xeon/README.md
+++ b/FaqGen/docker_compose/intel/cpu/xeon/README.md
@@ -114,9 +114,11 @@ docker compose up -d
 3. MegaService
 
    ```bash
-   curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
-        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
-        }'
+   curl http://${host_ip}:8888/v1/faqgen \
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
    ```
 
    Following the validation of all aforementioned microservices, we are now prepared to construct a mega-service.
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/README.md b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
index acdded9c2..548a94e16 100644
--- a/FaqGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/FaqGen/docker_compose/intel/hpu/gaudi/README.md
@@ -28,7 +28,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
 
 ```bash
 git clone https://github.com/opea-project/GenAIExamples
-cd GenAIExamples/FaqGen/docker/
+cd GenAIExamples/FaqGen/
 docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
 ```
 
@@ -37,7 +37,7 @@ docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_pro
 Construct the frontend Docker image using the command below:
 
 ```bash
-cd GenAIExamples/FaqGen/
+cd GenAIExamples/FaqGen/ui
 docker build -t opea/faqgen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
@@ -115,9 +115,11 @@ docker compose up -d
 3. MegaService
 
    ```bash
-   curl http://${host_ip}:8888/v1/faqgen -H "Content-Type: application/json" -d '{
-        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
-        }'
+   curl http://${host_ip}:8888/v1/faqgen \
+      -H "Content-Type: multipart/form-data" \
+      -F "messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." \
+      -F "max_tokens=32" \
+      -F "stream=false"
    ```
 
 ## 🚀 Launch the UI
diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh
index a58339780..161c1e2a7 100644
--- a/FaqGen/tests/test_compose_on_gaudi.sh
+++ b/FaqGen/tests/test_compose_on_gaudi.sh
@@ -101,13 +101,30 @@ function validate_microservices() {
 }
 
 function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-    "${ip_address}:8888/v1/faqgen" \
-    "Text Embeddings Inference" \
-    "mega-faqgen" \
-    "faqgen-gaudi-backend-server" \
-    '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+    local SERVICE_NAME="mega-faqgen"
+    local DOCKER_NAME="faqgen-gaudi-backend-server"
+    local EXPECTED_RESULT="Embeddings"
+    local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+    local URL="${ip_address}:8888/v1/faqgen"
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+        local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        else
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+            exit 1
+        fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    fi
+    sleep 1s
 }
 
 function validate_frontend() {
@@ -152,7 +169,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    validate_frontend
+    # validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh
index c6265e02d..e9ed4bf1e 100755
--- a/FaqGen/tests/test_compose_on_xeon.sh
+++ b/FaqGen/tests/test_compose_on_xeon.sh
@@ -101,13 +101,30 @@ function validate_microservices() {
 }
 
 function validate_megaservice() {
-    # Curl the Mega Service
-    validate_services \
-    "${ip_address}:8888/v1/faqgen" \
-    "Text Embeddings Inference" \
-    "mega-faqgen" \
-    "faqgen-xeon-backend-server" \
-    '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+    local SERVICE_NAME="mega-faqgen"
+    local DOCKER_NAME="faqgen-xeon-backend-server"
+    local EXPECTED_RESULT="Embeddings"
+    local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+    local URL="${ip_address}:8888/v1/faqgen"
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+        local CONTENT=$(curl -s -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
+        else
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+            exit 1
+        fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
+    fi
+    sleep 1s
 }
 
 function validate_frontend() {
@@ -152,7 +169,7 @@ function main() {
 
     validate_microservices
     validate_megaservice
-    validate_frontend
+    # validate_frontend
 
     stop_docker
     echo y | docker system prune
diff --git a/FaqGen/ui/svelte/.env b/FaqGen/ui/svelte/.env
index bfdca1c9a..4d0880c76 100644
--- a/FaqGen/ui/svelte/.env
+++ b/FaqGen/ui/svelte/.env
@@ -1 +1 @@
-DOC_BASE_URL = 'http://backend_address:8888/v1/faqgen'
+FAQ_BASE_URL = 'http://backend_address:8888/v1/faqgen'
diff --git a/FaqGen/ui/svelte/src/lib/doc.svelte b/FaqGen/ui/svelte/src/lib/doc.svelte
index bae896ba3..f9ea33584 100644
--- a/FaqGen/ui/svelte/src/lib/doc.svelte
+++ b/FaqGen/ui/svelte/src/lib/doc.svelte
@@ -38,8 +38,8 @@
     } else {
       currentIdx = index;
       if (
-        (currentIdx === 1 && message !== "") ||
-        (currentIdx === 2 && $kb_id !== "")
+        (currentIdx === 2 && message !== "") ||
+        (currentIdx === 1 && $kb_id !== "")
       ) {
         formModal = true;
       } else {
@@ -49,10 +49,10 @@
   }
 
   function panelExchange() {
-    if (currentIdx === 2) {
+    if (currentIdx === 1) {
       kb_id.set("");
       dispatch("clearMsg", { status: true });
-    } else if (currentIdx === 1) {
+    } else if (currentIdx === 2) {
       message = "";
       dispatch("clearMsg", { status: true });
     }
@@ -152,7 +152,7 @@
       type="submit"
       data-testid="sum-click"
       class="xl:my-12 inline-flex items-center px-5 py-2.5 text-sm font-medium text-center text-white bg-blue-700 mt-2 focus:ring-4 focus:ring-blue-200 dark:focus:ring-blue-900 hover:bg-blue-800"
-      on:click={() => generateFaq()}
+       on:click={() => generateFaq()}
     >
       Generate FAQs
     </button>
@@ -165,11 +165,11 @@
   />
   {#if currentIdx === 1}
     <h3 class="mb-5 text-lg font-normal text-gray-500 dark:text-gray-400">
-      The current content will be cleared.
+      The currently uploaded file will be cleared.
     </h3>
   {:else if currentIdx === 2}
     <h3 class="mb-5 text-lg font-normal text-gray-500 dark:text-gray-400">
-      The currently uploaded file will be cleared.
+      The current content will be cleared.
     </h3>
   {/if}
 
diff --git a/FaqGen/ui/svelte/src/lib/dropFile.svelte b/FaqGen/ui/svelte/src/lib/dropFile.svelte
index ef52ca1d0..fcc972c54 100644
--- a/FaqGen/ui/svelte/src/lib/dropFile.svelte
+++ b/FaqGen/ui/svelte/src/lib/dropFile.svelte
@@ -15,30 +15,36 @@
 -->
 
 <script lang="ts">
-  import { Dropzone } from "flowbite-svelte";
-  import ImgLogo from "./assets/imgLogo.svelte";
-  import { kb_id } from "./shared/Store.js";
-  import { fetchKnowledgeBaseId } from "./shared/Network.js";
+  import { kb_id, uploadFile, uploadFilesName } from "./shared/Store.js";
 
   let uploadInput: HTMLInputElement;
-  let uploadFileName = '';
-
+  let uploadFileName = "";
+  let displayHint = false;
 
   function handleInput(event: Event) {
     const file = (event.target as HTMLInputElement).files![0];
 
     if (!file) return;
 
+    // Check if the file size exceeds 2KB (2048 bytes)
+    if (file.size > 2048) {
+      displayHint = true;
+      setTimeout(() => {
+        displayHint = false;
+      }, 3000);
+      return; // Exit the function if the file is too large
+    }
+
     const reader = new FileReader();
     reader.onloadend = async () => {
       if (!reader.result) return;
       const src = reader.result.toString();
       const blob = await fetch(src).then((r) => r.blob());
       const fileName = file.name;
+      uploadFilesName.set(fileName);
+      kb_id.set(fileName);
+      uploadFile.set(blob);
       uploadFileName = fileName;
-      const res = await fetchKnowledgeBaseId(blob, fileName);
-      kb_id.set(res.document_id);
-      console.log("upload File", $kb_id);
     };
     reader.readAsDataURL(file);
   }
@@ -65,13 +71,18 @@
           d="M13 13h3a3 3 0 0 0 0-6h-.025A5.56 5.56 0 0 0 16 6.5 5.5 5.5 0 0 0 5.207 5.021C5.137 5.017 5.071 5 5 5a4 4 0 0 0 0 8h2.167M10 15V6m0 0L8 8m2-2 2 2"
         />
       </svg>
-      {#if uploadFileName === ''}
+      {#if uploadFileName === ""}
         <p class="mb-2 text-sm text-gray-500 dark:text-gray-400">
           <span class="font-semibold">Click to upload</span> or drag and drop
         </p>
         <p class="text-xs text-gray-500 dark:text-gray-400">
           PDF, TXT, .Doc and so on
         </p>
+        {#if displayHint}
+          <p class="text-xs text-red-500 dark:text-red-400">
+            Maximum upload size is 2KB.
+          </p>
+        {/if}
       {:else}
         <p>{uploadFileName}</p>
       {/if}
diff --git a/FaqGen/ui/svelte/src/lib/shared/Network.ts b/FaqGen/ui/svelte/src/lib/shared/Network.ts
index 2a54113e0..243512d94 100644
--- a/FaqGen/ui/svelte/src/lib/shared/Network.ts
+++ b/FaqGen/ui/svelte/src/lib/shared/Network.ts
@@ -12,50 +12,75 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import { SSE } from "sse.js";
 import { env } from "$env/dynamic/public";
 
-const DOC_BASE_URL = env.DOC_BASE_URL;
+const FAQ_BASE_URL = env.FAQ_BASE_URL;
 
-async function fetchPostRes(url, init) {
-  try {
-    const response = await fetch(url, init);
-    if (!response.ok) throw response.status;
-    return await response.json();
-  } catch (error) {
-    console.error("network error: ", error);
-    return undefined;
+export async function fetchTextStream(query: string | Blob, params: string, file: Blob, fileName: string | undefined) {
+  const url = `${FAQ_BASE_URL}`; // Ensure the URL is constructed correctly
+  const formData = new FormData();
+
+  if (!file) {
+    file = new Blob([""], { type: "text/plain" });
+    fileName = "empty.txt";
   }
-}
 
-export async function fetchKnowledgeBaseId(file: Blob, fileName: string) {
-  const url = `${DOC_BASE_URL}/doc_upload`;
-  const formData = new FormData();
-  formData.append("file", file, fileName);
+  if (params === "doc_id") {
+    formData.append("files", file, fileName);
+    formData.append("messages", query);
+  } else if (params === "text") {
+    formData.append("files", file, fileName);
+    formData.append("messages", query);
+  }
 
-  const init: RequestInit = {
+  // Initiate the POST request to upload the file
+  const init = {
     method: "POST",
     body: formData,
   };
 
-  return fetchPostRes(url, init);
-}
+  const postResponse = await fetch(url, init);
 
-export async function fetchTextStream(query: string, urlSuffix: string, params: string) {
-  let payload = {};
-  let url = "";
-  if (params === "doc_id") {
-    payload = { doc_id: query };
-    url = ``;
-  } else if (params === "text") {
-    payload = { messages: query };
-    url = `${DOC_BASE_URL}`;
+  if (!postResponse.ok) {
+    throw new Error(`Error uploading file: ${postResponse.status}`);
   }
 
-  console.log("url", url);
+  // Function to create an async iterator for the stream
+  async function* streamGenerator() {
+    if (!postResponse.body) {
+      throw new Error("Response body is null");
+    }
+    const reader = postResponse.body.getReader();
+    const decoder = new TextDecoder("utf-8");
+    let done, value;
+
+    let buffer = ""; // Initialize a buffer
+
+    while (({ done, value } = await reader.read())) {
+      if (done) break;
+
+      // Decode chunk and append to buffer
+      const chunk = decoder.decode(value, { stream: true });
+      buffer += chunk;
+
+      // Use regex to clean and extract data
+      const cleanedChunks = buffer
+        .split("\n")
+        .map((line) => {
+          // Remove 'data: b' at the start and ' ' at the end
+          return line.replace(/^data:\s*|^b'|'\s*$/g, "").trim(); // Clean unnecessary characters
+        })
+        .filter((line) => line); // Remove empty lines
+
+      for (const cleanedChunk of cleanedChunks) {
+        // Further clean to ensure all unnecessary parts are removed
+        yield cleanedChunk.replace(/^b'|['"]$/g, ""); // Again clean 'b' and other single or double quotes
+      }
+
+      // If there is an incomplete message in the current buffer, keep it
+      buffer = buffer.endsWith("\n") ? "" : cleanedChunks.pop() || ""; // Keep the last incomplete part
+    }
+  }
 
-  return new SSE(url, {
-    headers: { "Content-Type": "application/json" },
-    payload: JSON.stringify(payload),
-  });
+  return streamGenerator(); // Return the async generator
 }
diff --git a/FaqGen/ui/svelte/src/lib/shared/Store.ts b/FaqGen/ui/svelte/src/lib/shared/Store.ts
index 99d1605e3..803c3fca2 100644
--- a/FaqGen/ui/svelte/src/lib/shared/Store.ts
+++ b/FaqGen/ui/svelte/src/lib/shared/Store.ts
@@ -17,3 +17,7 @@ import { writable } from "svelte/store";
 export let kb_id = writable("");
 
 export let loading = writable(false);
+
+export const uploadFile = writable<Blob | null>(null);
+
+export let uploadFilesName = writable("");
diff --git a/FaqGen/ui/svelte/src/routes/+page.svelte b/FaqGen/ui/svelte/src/routes/+page.svelte
index 40ef665b5..69485ce93 100644
--- a/FaqGen/ui/svelte/src/routes/+page.svelte
+++ b/FaqGen/ui/svelte/src/routes/+page.svelte
@@ -19,7 +19,7 @@
   import Doc from "$lib/doc.svelte";
   import Faq from "$lib/faq.svelte";
   import { fetchTextStream } from "$lib/shared/Network.js";
-  import { loading } from "$lib/shared/Store.js";
+  import { loading, uploadFilesName, uploadFile } from "$lib/shared/Store.js";
   import { onMount } from "svelte";
   import { scrollToBottom } from "$lib/shared/Utils.js";
 
@@ -31,20 +31,25 @@
     console.log("scrollToDiv", scrollToDiv);
   });
 
-  let code_output: string = "";
-  let query: string = "";
-  let deleteFlag: boolean = false;
-
   const callTextStream = async (
     query: string,
     urlSuffix: string,
-    params: string,
+    params: string
   ) => {
-    messages = "";
-    const eventSource = await fetchTextStream(query, urlSuffix, params);
+    // Fetch the stream
+    const eventStream = await fetchTextStream(
+      query,
+      params,
+      $uploadFile,
+      $uploadFilesName
+    );
+
+    // Process the stream as an async iterator
+    try {
+      for await (const chunk of eventStream) {
+      let Msg = chunk;
+      console.log('Msg', Msg);
 
-    eventSource.addEventListener("message", (e: any) => {
-      let Msg = e.data;
       if (Msg !== "[DONE]") {
         let res = JSON.parse(Msg);
         let logs = res.ops;
@@ -52,7 +57,7 @@
         logs.forEach((log: { op: string; path: string; value: any }) => {
           if (log.op === "add") {
             if (
-              log.value !== "</s>" &&
+              log.value !== "<|eot_id|>" &&
               log.path.endsWith("/streamed_output/-") &&
               log.path.length > "/streamed_output/-".length
             ) {
@@ -65,8 +70,10 @@
         loading.set(false);
         scrollToBottom(scrollToDiv);
       }
-    });
-    eventSource.stream();
+      }
+    } catch (error) {
+      console.error("Error processing the stream:", error);
+    }
   };
 
   async function handleGenerateFaq(e) {
diff --git a/MultimodalQnA/README.md b/MultimodalQnA/README.md
index 95626aa78..08de5686a 100644
--- a/MultimodalQnA/README.md
+++ b/MultimodalQnA/README.md
@@ -2,7 +2,7 @@
 
 Suppose you possess a set of videos and wish to perform question-answering to extract insights from these videos. To respond to your questions, it typically necessitates comprehension of visual cues within the videos, knowledge derived from the audio content, or often a mix of both these visual elements and auditory facts. The MultimodalQnA framework offers an optimal solution for this purpose.
 
-`MultimodalQnA` addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos. For this purpose, MultimodalQnA utilizes [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi), a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the video ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
+`MultimodalQnA` addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos, images, and audio files. For this purpose, MultimodalQnA utilizes [BridgeTower model](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi), a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
 
 The MultimodalQnA architecture shows below:
 
@@ -100,10 +100,12 @@ In the below, we provide a table that describes for each microservice component
 
 By default, the embedding and LVM models are set to a default value as listed below:
 
-| Service              | Model                                       |
-| -------------------- | ------------------------------------------- |
-| embedding-multimodal | BridgeTower/bridgetower-large-itm-mlm-gaudi |
-| LVM                  | llava-hf/llava-v1.6-vicuna-13b-hf           |
+| Service              | HW    | Model                                     |
+| -------------------- | ----- | ----------------------------------------- |
+| embedding-multimodal | Xeon  | BridgeTower/bridgetower-large-itm-mlm-itc |
+| LVM                  | Xeon  | llava-hf/llava-1.5-7b-hf                  |
+| embedding-multimodal | Gaudi | BridgeTower/bridgetower-large-itm-mlm-itc |
+| LVM                  | Gaudi | llava-hf/llava-v1.6-vicuna-13b-hf         |
 
 You can choose other LVM models, such as `llava-hf/llava-1.5-7b-hf ` and `llava-hf/llava-1.5-13b-hf`, as needed.
 
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
index 9b3a3edaa..d0a1c7d27 100644
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
@@ -84,16 +84,18 @@ export INDEX_NAME="mm-rag-redis"
 export LLAVA_SERVER_PORT=8399
 export LVM_ENDPOINT="http://${host_ip}:8399"
 export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
 export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -274,54 +276,76 @@ curl http://${host_ip}:9399/v1/lvm \
 
 6. dataprep-multimodal-redis
 
-Download a sample video
+Download a sample video, image, and audio file and create a caption
 
 ```bash
 export video_fn="WeAreGoingOnBullrun.mp4"
 wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
+
+export image_fn="apple.png"
+wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
+
+export caption_fn="apple.txt"
+echo "This is an apple."  > ${caption_fn}
+
+export audio_fn="AudioSample.wav"
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn}
 ```
 
-Test dataprep microservice. This command updates a knowledge base by uploading a local video .mp4.
+Test dataprep microservice with generating transcript. This command updates a knowledge base by uploading a local video .mp4 and an audio .wav file.
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST \
+    -F "files=@./${video_fn}" \
+    -F "files=@./${audio_fn}"
 ```
 
-Also, test dataprep microservice with generating caption using lvm microservice
+Also, test dataprep microservice with generating an image caption using lvm microservice
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST -F "files=@./${image_fn}"
+```
+
+Now, test the microservice with posting a custom caption along with an image
+
+```bash
+curl --silent --write-out "HTTPSTATUS:%{http_code}" \
+    ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
+    -H 'Content-Type: multipart/form-data' \
+    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}"
 ```
 
-Also, you are able to get the list of all videos that you uploaded:
+Also, you are able to get the list of all files that you uploaded:
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_GET_VIDEO_ENDPOINT}
+    ${DATAPREP_GET_FILE_ENDPOINT}
 ```
 
-Then you will get the response python-style LIST like this. Notice the name of each uploaded video e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded video. The same video that are uploaded twice will have different `uuid`.
+Then you will get the response python-style LIST like this. Notice the name of each uploaded file e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded file. The same files that are uploaded twice will have different `uuid`.
 
 ```bash
 [
     "WeAreGoingOnBullrun_7ac553a1-116c-40a2-9fc5-deccbb89b507.mp4",
-    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4"
+    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4",
+    "apple_fcade6e6-11a5-44a2-833a-3e534cbe4419.png",
+    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav
 ]
 ```
 
-To delete all uploaded videos along with data indexed with `$INDEX_NAME` in REDIS.
+To delete all uploaded files along with data indexed with `$INDEX_NAME` in REDIS.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_DELETE_VIDEO_ENDPOINT}
+    ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
 7. MegaService
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
index d9bf3bce9..eece99da8 100644
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -36,6 +36,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PORT: ${EMBEDDER_PORT}
+    entrypoint: ["python", "bridgetower_server.py", "--device", "cpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
     restart: unless-stopped
   embedding-multimodal:
     image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
@@ -76,6 +77,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
+    entrypoint: ["python", "llava_server.py", "--device", "cpu", "--model_name_or_path", $LVM_MODEL_ID]
     restart: unless-stopped
   lvm-llava-svc:
     image: ${REGISTRY:-opea}/lvm-llava-svc:${TAG:-latest}
@@ -125,6 +127,7 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
       - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
       - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
     ipc: host
diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
index ca5e650ff..d8824fb0b 100755
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -15,13 +15,15 @@ export INDEX_NAME="mm-rag-redis"
 export LLAVA_SERVER_PORT=8399
 export LVM_ENDPOINT="http://${host_ip}:8399"
 export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
 export WHISPER_MODEL="base"
 export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
index 6517b100c..6d6ca88ff 100644
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -40,10 +40,11 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -224,56 +225,76 @@ curl http://${host_ip}:9399/v1/lvm \
 
 6. Multimodal Dataprep Microservice
 
-Download a sample video
+Download a sample video, image, and audio file and create a caption
 
 ```bash
 export video_fn="WeAreGoingOnBullrun.mp4"
 wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
-```
 
-Test dataprep microservice. This command updates a knowledge base by uploading a local video .mp4.
+export image_fn="apple.png"
+wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
+
+export caption_fn="apple.txt"
+echo "This is an apple."  > ${caption_fn}
+
+export audio_fn="AudioSample.wav"
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav -O ${audio_fn}
+```
 
-Test dataprep microservice with generating transcript using whisper model
+Test dataprep microservice with generating transcript. This command updates a knowledge base by uploading a local video .mp4 and an audio .wav file.
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST \
+    -F "files=@./${video_fn}" \
+    -F "files=@./${audio_fn}"
 ```
 
-Also, test dataprep microservice with generating caption using lvm-tgi
+Also, test dataprep microservice with generating an image caption using lvm-tgi
 
 ```bash
 curl --silent --write-out "HTTPSTATUS:%{http_code}" \
     ${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT} \
     -H 'Content-Type: multipart/form-data' \
-    -X POST -F "files=@./${video_fn}"
+    -X POST -F "files=@./${image_fn}"
+```
+
+Now, test the microservice with posting a custom caption along with an image
+
+```bash
+curl --silent --write-out "HTTPSTATUS:%{http_code}" \
+    ${DATAPREP_INGEST_SERVICE_ENDPOINT} \
+    -H 'Content-Type: multipart/form-data' \
+    -X POST -F "files=@./${image_fn}" -F "files=@./${caption_fn}"
 ```
 
-Also, you are able to get the list of all videos that you uploaded:
+Also, you are able to get the list of all files that you uploaded:
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_GET_VIDEO_ENDPOINT}
+    ${DATAPREP_GET_FILE_ENDPOINT}
 ```
 
-Then you will get the response python-style LIST like this. Notice the name of each uploaded video e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded video. The same video that are uploaded twice will have different `uuid`.
+Then you will get the response python-style LIST like this. Notice the name of each uploaded file e.g., `videoname.mp4` will become `videoname_uuid.mp4` where `uuid` is a unique ID for each uploaded file. The same files that are uploaded twice will have different `uuid`.
 
 ```bash
 [
     "WeAreGoingOnBullrun_7ac553a1-116c-40a2-9fc5-deccbb89b507.mp4",
-    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4"
+    "WeAreGoingOnBullrun_6d13cf26-8ba2-4026-a3a9-ab2e5eb73a29.mp4",
+    "apple_fcade6e6-11a5-44a2-833a-3e534cbe4419.png",
+    "AudioSample_976a85a6-dc3e-43ab-966c-9d81beef780c.wav
 ]
 ```
 
-To delete all uploaded videos along with data indexed with `$INDEX_NAME` in REDIS.
+To delete all uploaded files along with data indexed with `$INDEX_NAME` in REDIS.
 
 ```bash
 curl -X POST \
     -H "Content-Type: application/json" \
-    ${DATAPREP_DELETE_VIDEO_ENDPOINT}
+    ${DATAPREP_DELETE_FILE_ENDPOINT}
 ```
 
 7. MegaService
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
index d7ac74084..e66aea1f0 100644
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -36,6 +36,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       PORT: ${EMBEDDER_PORT}
+    entrypoint: ["python", "bridgetower_server.py", "--device", "hpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
     restart: unless-stopped
   embedding-multimodal:
     image: ${REGISTRY:-opea}/embedding-multimodal:${TAG:-latest}
@@ -139,6 +140,7 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
+      - DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
       - DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
       - DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
     ipc: host
diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
index 211a1a696..b5be052e1 100755
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -22,7 +22,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
 export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
 export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
diff --git a/MultimodalQnA/tests/test_compose_on_gaudi.sh b/MultimodalQnA/tests/test_compose_on_gaudi.sh
index dd7af39fb..3b629f52b 100644
--- a/MultimodalQnA/tests/test_compose_on_gaudi.sh
+++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh
@@ -14,12 +14,13 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 
+export image_fn="apple.png"
 export video_fn="WeAreGoingOnBullrun.mp4"
+export caption_fn="apple.txt"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
     git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
-
     echo "Build all the images with --no-cache, check docker_image_build.log for details..."
     service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding-multimodal retriever-multimodal-redis lvm-tgi dataprep-multimodal-redis"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
@@ -40,17 +41,18 @@ function setup_env() {
     export LLAVA_SERVER_PORT=8399
     export LVM_ENDPOINT="http://${host_ip}:8399"
     export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
-    export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-13b-hf"
+    export LVM_MODEL_ID="llava-hf/llava-v1.6-vicuna-7b-hf"
     export WHISPER_MODEL="base"
     export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
     export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
     export LVM_SERVICE_HOST_IP=${host_ip}
     export MEGA_SERVICE_HOST_IP=${host_ip}
     export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
     export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
     export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-    export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-    export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 }
 
 function start_services() {
@@ -63,12 +65,15 @@ function start_services() {
 
 function prepare_data() {
     cd $LOG_PATH
-    echo "Downloading video"
+    echo "Downloading image and video"
+    wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
     wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
+    echo "Writing caption file"
+    echo "This is an apple."  > ${caption_fn}
 
     sleep 30s
-
 }
+
 function validate_service() {
     local URL="$1"
     local EXPECTED_RESULT="$2"
@@ -76,9 +81,15 @@ function validate_service() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis"* ]]; then
+    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis-transcript"* ]]; then
         cd $LOG_PATH
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${video_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-caption"* ]]; then
+         cd $LOG_PATH
+         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-ingest"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -F "files=@./apple.txt" -H 'Content-Type: multipart/form-data' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
@@ -147,27 +158,34 @@ function validate_microservices() {
     sleep 1m # retrieval can't curl as expected, try to wait for more time
 
     # test data prep
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Generating Transcript for Video"
     validate_service \
         "${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-transcript" \
         "dataprep-multimodal-redis"
 
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Image & Caption Ingestion"
     validate_service \
-        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-ingest" \
         "dataprep-multimodal-redis"
 
-    echo "Validating get file"
+    echo "Validating get file returns mp4"
     validate_service \
-        "${DATAPREP_GET_VIDEO_ENDPOINT}" \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
         '.mp4' \
         "dataprep_get" \
         "dataprep-multimodal-redis"
 
+    echo "Validating get file returns png"
+    validate_service \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
+        '.png' \
+        "dataprep_get" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 
     # multimodal retrieval microservice
@@ -180,7 +198,7 @@ function validate_microservices() {
         "retriever-multimodal-redis" \
         "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 
-    sleep 10s
+    sleep 3m
 
     # llava server
     echo "Evaluating LLAVA tgi-gaudi"
@@ -200,6 +218,14 @@ function validate_microservices() {
         "lvm-tgi" \
         '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 
+    # data prep requiring lvm
+    echo "Data Prep with Generating Caption for Image"
+    validate_service \
+        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-caption" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 }
 
@@ -224,14 +250,22 @@ function validate_megaservice() {
 }
 
 function validate_delete {
-    echo "Validate data prep delete videos"
+    echo "Validate data prep delete files"
     validate_service \
-        "${DATAPREP_DELETE_VIDEO_ENDPOINT}" \
+        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
         '{"status":true}' \
         "dataprep_del" \
         "dataprep-multimodal-redis"
 }
 
+function delete_data() {
+    cd $LOG_PATH
+    echo "Deleting image, video, and caption"
+    rm -rf ${image_fn}
+    rm -rf ${video_fn}
+    rm -rf ${caption_fn}
+}
+
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
     docker compose -f compose.yaml stop && docker compose -f compose.yaml rm -f
@@ -256,6 +290,7 @@ function main() {
     validate_delete
     echo "==== delete validated ===="
 
+    delete_data
     stop_docker
     echo y | docker system prune
 
diff --git a/MultimodalQnA/tests/test_compose_on_xeon.sh b/MultimodalQnA/tests/test_compose_on_xeon.sh
index 46042c600..7d3ab0fae 100644
--- a/MultimodalQnA/tests/test_compose_on_xeon.sh
+++ b/MultimodalQnA/tests/test_compose_on_xeon.sh
@@ -14,7 +14,9 @@ WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
 
+export image_fn="apple.png"
 export video_fn="WeAreGoingOnBullrun.mp4"
+export caption_fn="apple.txt"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
@@ -37,6 +39,7 @@ function setup_env() {
     export INDEX_NAME="mm-rag-redis"
     export LLAVA_SERVER_PORT=8399
     export LVM_ENDPOINT="http://${host_ip}:8399"
+    export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
     export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
     export WHISPER_MODEL="base"
     export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
@@ -44,10 +47,11 @@ function setup_env() {
     export LVM_SERVICE_HOST_IP=${host_ip}
     export MEGA_SERVICE_HOST_IP=${host_ip}
     export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
     export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
     export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
-    export DATAPREP_GET_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
-    export DATAPREP_DELETE_VIDEO_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_videos"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
 }
 
 function start_services() {
@@ -61,12 +65,14 @@ function start_services() {
 
 function prepare_data() {
     cd $LOG_PATH
-    echo "Downloading video"
+    echo "Downloading image and video"
+    wget https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true -O ${image_fn}
     wget http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/WeAreGoingOnBullrun.mp4 -O ${video_fn}
-
+    echo "Writing caption file"
+    echo "This is an apple."  > ${caption_fn}
     sleep 1m
-
 }
+
 function validate_service() {
     local URL="$1"
     local EXPECTED_RESULT="$2"
@@ -74,9 +80,15 @@ function validate_service() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis"* ]]; then
+    if [[ $SERVICE_NAME == *"dataprep-multimodal-redis-transcript"* ]]; then
         cd $LOG_PATH
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${video_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-caption"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -H 'Content-Type: multipart/form-data' "$URL")
+    elif [[ $SERVICE_NAME == *"dataprep-multimodal-redis-ingest"* ]]; then
+        cd $LOG_PATH
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "files=@./${image_fn}" -F "files=@./apple.txt" -H 'Content-Type: multipart/form-data' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
@@ -145,27 +157,34 @@ function validate_microservices() {
     sleep 1m # retrieval can't curl as expected, try to wait for more time
 
     # test data prep
-    echo "Data Prep with Generating Transcript"
+    echo "Data Prep with Generating Transcript for Video"
     validate_service \
         "${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}" \
         "Data preparation succeeded" \
-        "dataprep-multimodal-redis" \
+        "dataprep-multimodal-redis-transcript" \
         "dataprep-multimodal-redis"
 
-    # echo "Data Prep with Generating Caption"
-    # validate_service \
-    #     "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
-    #     "Data preparation succeeded" \
-    #     "dataprep-multimodal-redis" \
-    #     "dataprep-multimodal-redis"
+    echo "Data Prep with Image & Caption Ingestion"
+    validate_service \
+        "${DATAPREP_INGEST_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-ingest" \
+        "dataprep-multimodal-redis"
 
-    echo "Validating get file"
+    echo "Validating get file returns mp4"
     validate_service \
-        "${DATAPREP_GET_VIDEO_ENDPOINT}" \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
         '.mp4' \
         "dataprep_get" \
         "dataprep-multimodal-redis"
 
+    echo "Validating get file returns png"
+    validate_service \
+        "${DATAPREP_GET_FILE_ENDPOINT}" \
+        '.png' \
+        "dataprep_get" \
+        "dataprep-multimodal-redis"
+
     sleep 1m
 
     # multimodal retrieval microservice
@@ -178,7 +197,7 @@ function validate_microservices() {
         "retriever-multimodal-redis" \
         "{\"text\":\"test\",\"embedding\":${your_embedding}}"
 
-    sleep 10s
+    sleep 3m
 
     # llava server
     echo "Evaluating lvm-llava"
@@ -198,6 +217,14 @@ function validate_microservices() {
         "lvm-llava-svc" \
         '{"retrieved_docs": [], "initial_query": "What is this?", "top_n": 1, "metadata": [{"b64_img_str": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "transcript_for_inference": "yellow image", "video_id": "8c7461df-b373-4a00-8696-9a2234359fe0", "time_of_frame_ms":"37000000", "source_video":"WeAreGoingOnBullrun_8c7461df-b373-4a00-8696-9a2234359fe0.mp4"}], "chat_template":"The caption of the image is: '\''{context}'\''. {question}"}'
 
+    # data prep requiring lvm
+    echo "Data Prep with Generating Caption for Image"
+    validate_service \
+        "${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}" \
+        "Data preparation succeeded" \
+        "dataprep-multimodal-redis-caption" \
+        "dataprep-multimodal-redis"
+
     sleep 3m
 }
 
@@ -222,14 +249,22 @@ function validate_megaservice() {
 }
 
 function validate_delete {
-    echo "Validate data prep delete videos"
+    echo "Validate data prep delete files"
     validate_service \
-        "${DATAPREP_DELETE_VIDEO_ENDPOINT}" \
+        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
         '{"status":true}' \
         "dataprep_del" \
         "dataprep-multimodal-redis"
 }
 
+function delete_data() {
+    cd $LOG_PATH
+    echo "Deleting image, video, and caption"
+    rm -rf ${image_fn}
+    rm -rf ${video_fn}
+    rm -rf ${caption_fn}
+}
+
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon
     docker compose -f compose.yaml stop && docker compose -f compose.yaml rm -f
@@ -254,6 +289,7 @@ function main() {
     validate_delete
     echo "==== delete validated ===="
 
+    delete_data
     stop_docker
     echo y | docker system prune
 
diff --git a/MultimodalQnA/ui/gradio/conversation.py b/MultimodalQnA/ui/gradio/conversation.py
index 9f1a2827b..3057e9879 100644
--- a/MultimodalQnA/ui/gradio/conversation.py
+++ b/MultimodalQnA/ui/gradio/conversation.py
@@ -30,6 +30,7 @@ class Conversation:
     base64_frame: str = None
     skip_next: bool = False
     split_video: str = None
+    image: str = None
 
     def _template_caption(self):
         out = ""
@@ -59,6 +60,8 @@ def get_prompt(self):
                                 else:
                                     base64_frame = get_b64_frame_from_timestamp(self.video_file, self.time_of_frame_ms)
                                     self.base64_frame = base64_frame
+                                if base64_frame is None:
+                                    base64_frame = ""
                                 content.append({"type": "image_url", "image_url": {"url": base64_frame}})
                             else:
                                 content = message
@@ -137,6 +140,7 @@ def dict(self):
             "caption": self.caption,
             "base64_frame": self.base64_frame,
             "split_video": self.split_video,
+            "image": self.image,
         }
 
 
@@ -152,4 +156,5 @@ def dict(self):
     time_of_frame_ms=None,
     base64_frame=None,
     split_video=None,
+    image=None,
 )
diff --git a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
index 3eba01a71..ec6a033ca 100644
--- a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
+++ b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py
@@ -13,7 +13,7 @@
 from conversation import multimodalqna_conv
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
-from utils import build_logger, moderation_msg, server_error_msg, split_video
+from utils import build_logger, make_temp_image, moderation_msg, server_error_msg, split_video
 
 logger = build_logger("gradio_web_server", "gradio_web_server.log")
 
@@ -47,22 +47,24 @@ def clear_history(state, request: gr.Request):
     logger.info(f"clear_history. ip: {request.client.host}")
     if state.split_video and os.path.exists(state.split_video):
         os.remove(state.split_video)
+    if state.image and os.path.exists(state.image):
+        os.remove(state.image)
     state = multimodalqna_conv.copy()
-    return (state, state.to_gradio_chatbot(), "", None) + (disable_btn,) * 1
+    return (state, state.to_gradio_chatbot(), None, None, None) + (disable_btn,) * 1
 
 
 def add_text(state, text, request: gr.Request):
     logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
     if len(text) <= 0:
         state.skip_next = True
-        return (state, state.to_gradio_chatbot(), "", None) + (no_change_btn,) * 1
+        return (state, state.to_gradio_chatbot(), None) + (no_change_btn,) * 1
 
     text = text[:2000]  # Hard cut-off
 
     state.append_message(state.roles[0], text)
     state.append_message(state.roles[1], None)
     state.skip_next = False
-    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 1
+    return (state, state.to_gradio_chatbot(), None) + (disable_btn,) * 1
 
 
 def http_bot(state, request: gr.Request):
@@ -73,7 +75,7 @@ def http_bot(state, request: gr.Request):
     if state.skip_next:
         # This generate call is skipped due to invalid inputs
         path_to_sub_videos = state.get_path_to_subvideos()
-        yield (state, state.to_gradio_chatbot(), path_to_sub_videos) + (no_change_btn,) * 1
+        yield (state, state.to_gradio_chatbot(), path_to_sub_videos, None) + (no_change_btn,) * 1
         return
 
     if len(state.messages) == state.offset + 2:
@@ -97,7 +99,7 @@ def http_bot(state, request: gr.Request):
     logger.info(f"==== url request ====\n{gateway_addr}")
 
     state.messages[-1][-1] = "▌"
-    yield (state, state.to_gradio_chatbot(), state.split_video) + (disable_btn,) * 1
+    yield (state, state.to_gradio_chatbot(), state.split_video, state.image) + (disable_btn,) * 1
 
     try:
         response = requests.post(
@@ -108,6 +110,7 @@ def http_bot(state, request: gr.Request):
         )
         print(response.status_code)
         print(response.json())
+
         if response.status_code == 200:
             response = response.json()
             choice = response["choices"][-1]
@@ -123,44 +126,61 @@ def http_bot(state, request: gr.Request):
                 video_file = metadata["source_video"]
                 state.video_file = os.path.join(static_dir, metadata["source_video"])
                 state.time_of_frame_ms = metadata["time_of_frame_ms"]
-                try:
-                    splited_video_path = split_video(
-                        state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}"
-                    )
-                except:
-                    print(f"video {state.video_file} does not exist in UI host!")
-                    splited_video_path = None
-                state.split_video = splited_video_path
+                file_ext = os.path.splitext(state.video_file)[-1]
+                if file_ext == ".mp4":
+                    try:
+                        splited_video_path = split_video(
+                            state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}"
+                        )
+                    except:
+                        print(f"video {state.video_file} does not exist in UI host!")
+                        splited_video_path = None
+                    state.split_video = splited_video_path
+                elif file_ext in [".jpg", ".jpeg", ".png", ".gif"]:
+                    try:
+                        output_image_path = make_temp_image(state.video_file, file_ext)
+                    except:
+                        print(f"image {state.video_file} does not exist in UI host!")
+                        output_image_path = None
+                    state.image = output_image_path
+
         else:
             raise requests.exceptions.RequestException
     except requests.exceptions.RequestException as e:
         state.messages[-1][-1] = server_error_msg
-        yield (state, state.to_gradio_chatbot(), None) + (enable_btn,)
+        yield (state, state.to_gradio_chatbot(), None, None) + (enable_btn,)
         return
 
     state.messages[-1][-1] = message
-    yield (state, state.to_gradio_chatbot(), state.split_video) + (enable_btn,) * 1
+    yield (
+        state,
+        state.to_gradio_chatbot(),
+        gr.Video(state.split_video, visible=state.split_video is not None),
+        gr.Image(state.image, visible=state.image is not None),
+    ) + (enable_btn,) * 1
 
     logger.info(f"{state.messages[-1][-1]}")
     return
 
 
-def ingest_video_gen_transcript(filepath, request: gr.Request):
-    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+def ingest_gen_transcript(filepath, filetype, request: gr.Request):
+    yield (
+        gr.Textbox(visible=True, value=f"Please wait while your uploaded {filetype} is ingested into the database...")
+    )
     verified_filepath = os.path.normpath(filepath)
     if not verified_filepath.startswith(tmp_upload_folder):
-        print("Found malicious video file name!")
+        print(f"Found malicious {filetype} file name!")
         yield (
             gr.Textbox(
                 visible=True,
-                value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!",
+                value=f"Your uploaded {filetype}'s file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
             )
         )
         return
     basename = os.path.basename(verified_filepath)
     dest = os.path.join(static_dir, basename)
     shutil.copy(verified_filepath, dest)
-    print("Done copy uploaded file to static folder!")
+    print("Done copying uploaded file to static folder.")
     headers = {
         # 'Content-Type': 'multipart/form-data'
     }
@@ -172,17 +192,17 @@ def ingest_video_gen_transcript(filepath, request: gr.Request):
     if response.status_code == 200:
         response = response.json()
         print(response)
-        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        yield (gr.Textbox(visible=True, value=f"The {filetype} ingestion is done. Saving your uploaded {filetype}..."))
         time.sleep(2)
         fn_no_ext = Path(dest).stem
-        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
-            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
-            print(response["video_id_maps"][fn_no_ext])
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
             os.rename(dest, new_dst)
             yield (
                 gr.Textbox(
                     visible=True,
-                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                    value=f"Congratulations, your upload is done!\nClick the X button on the top right of the {filetype} upload box to upload another {filetype}.",
                 )
             )
             return
@@ -190,51 +210,53 @@ def ingest_video_gen_transcript(filepath, request: gr.Request):
         yield (
             gr.Textbox(
                 visible=True,
-                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the {filetype} upload box to reupload your video.",
             )
         )
         time.sleep(2)
     return
 
 
-def ingest_video_gen_caption(filepath, request: gr.Request):
-    yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database..."))
+def ingest_gen_caption(filepath, filetype, request: gr.Request):
+    yield (
+        gr.Textbox(visible=True, value=f"Please wait while your uploaded {filetype} is ingested into the database...")
+    )
     verified_filepath = os.path.normpath(filepath)
     if not verified_filepath.startswith(tmp_upload_folder):
-        print("Found malicious video file name!")
+        print(f"Found malicious {filetype} file name!")
         yield (
             gr.Textbox(
                 visible=True,
-                value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!",
+                value=f"Your uploaded {filetype}'s file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
             )
         )
         return
     basename = os.path.basename(verified_filepath)
     dest = os.path.join(static_dir, basename)
     shutil.copy(verified_filepath, dest)
-    print("Done copy uploaded file to static folder!")
+    print("Done copying uploaded file to static folder.")
     headers = {
         # 'Content-Type': 'multipart/form-data'
     }
     files = {
         "files": open(dest, "rb"),
     }
-    response = requests.post(dataprep_gen_captiono_addr, headers=headers, files=files)
+    response = requests.post(dataprep_gen_caption_addr, headers=headers, files=files)
     print(response.status_code)
     if response.status_code == 200:
         response = response.json()
         print(response)
-        yield (gr.Textbox(visible=True, value="Video ingestion is done. Saving your uploaded video..."))
+        yield (gr.Textbox(visible=True, value=f"The {filetype} ingestion is done. Saving your uploaded {filetype}..."))
         time.sleep(2)
         fn_no_ext = Path(dest).stem
-        if "video_id_maps" in response and fn_no_ext in response["video_id_maps"]:
-            new_dst = os.path.join(static_dir, response["video_id_maps"][fn_no_ext])
-            print(response["video_id_maps"][fn_no_ext])
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
             os.rename(dest, new_dst)
             yield (
                 gr.Textbox(
                     visible=True,
-                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the video upload box to upload another video.",
+                    value=f"Congratulations, your upload is done!\nClick the X button on the top right of the {filetype} upload box to upload another {filetype}.",
                 )
             )
             return
@@ -242,48 +264,181 @@ def ingest_video_gen_caption(filepath, request: gr.Request):
         yield (
             gr.Textbox(
                 visible=True,
-                value="Something wrong!\nPlease click the X button on the top right of the video upload boxreupload your video!",
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the {filetype} upload box to reupload your video.",
             )
         )
         time.sleep(2)
     return
 
 
-def clear_uploaded_video(request: gr.Request):
+def ingest_with_text(filepath, text, request: gr.Request):
+    yield (gr.Textbox(visible=True, value="Please wait for your uploaded image to be ingested into the database..."))
+    verified_filepath = os.path.normpath(filepath)
+    if not verified_filepath.startswith(tmp_upload_folder):
+        print("Found malicious image file name!")
+        yield (
+            gr.Textbox(
+                visible=True,
+                value="Your uploaded image's file name has special characters that are not allowed (depends on the OS, some examples are \, /, :, and *). Please consider changing the file name.",
+            )
+        )
+        return
+    basename = os.path.basename(verified_filepath)
+    dest = os.path.join(static_dir, basename)
+    shutil.copy(verified_filepath, dest)
+    text_basename = "{}.txt".format(os.path.splitext(basename)[0])
+    text_dest = os.path.join(static_dir, text_basename)
+    with open(text_dest, "w") as file:
+        file.write(text)
+    print("Done copying uploaded files to static folder!")
+    headers = {
+        # 'Content-Type': 'multipart/form-data'
+    }
+    files = [("files", (basename, open(dest, "rb"))), ("files", (text_basename, open(text_dest, "rb")))]
+    try:
+        response = requests.post(dataprep_ingest_addr, headers=headers, files=files)
+    finally:
+        os.remove(text_dest)
+    print(response.status_code)
+    if response.status_code == 200:
+        response = response.json()
+        print(response)
+        yield (gr.Textbox(visible=True, value="Image ingestion is done. Saving your uploaded image..."))
+        time.sleep(2)
+        fn_no_ext = Path(dest).stem
+        if "file_id_maps" in response and fn_no_ext in response["file_id_maps"]:
+            new_dst = os.path.join(static_dir, response["file_id_maps"][fn_no_ext])
+            print(response["file_id_maps"][fn_no_ext])
+            os.rename(dest, new_dst)
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Congratulation! Your upload is done!\nClick the X button on the top right of the image upload box to upload another image.",
+                )
+            )
+            return
+    else:
+        yield (
+            gr.Textbox(
+                visible=True,
+                value=f"Something went wrong (server error: {response.status_code})!\nPlease click the X button on the top right of the image upload box to reupload your image!",
+            )
+        )
+        time.sleep(2)
+    return
+
+
+def hide_text(request: gr.Request):
     return gr.Textbox(visible=False)
 
 
-with gr.Blocks() as upload_gen_trans:
-    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Transcripts")
-    gr.Markdown(
-        "Please use this interface to ingest your own video if the video has meaningful audio (e.g., announcements, discussions, etc...)"
-    )
+def clear_text(request: gr.Request):
+    return None
+
+
+with gr.Blocks() as upload_video:
+    gr.Markdown("# Ingest Your Own Video Using Generated Transcripts or Captions")
+    gr.Markdown("Use this interface to ingest your own video and generate transcripts or captions for it")
+
+    def select_upload_type(choice, request: gr.Request):
+        if choice == "transcript":
+            return gr.Video(sources="upload", visible=True), gr.Video(sources="upload", visible=False)
+        else:
+            return gr.Video(sources="upload", visible=False), gr.Video(sources="upload", visible=True)
+
     with gr.Row():
         with gr.Column(scale=6):
-            video_upload = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload")
+            video_upload_trans = gr.Video(sources="upload", elem_id="video_upload_trans", visible=True)
+            video_upload_cap = gr.Video(sources="upload", elem_id="video_upload_cap", visible=False)
         with gr.Column(scale=3):
+            text_options_radio = gr.Radio(
+                [
+                    ("Generate transcript (video contains voice)", "transcript"),
+                    ("Generate captions (video does not contain voice)", "caption"),
+                ],
+                label="Text Options",
+                info="How should text be ingested?",
+                value="transcript",
+            )
             text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
-        video_upload.upload(ingest_video_gen_transcript, [video_upload], [text_upload_result])
-        video_upload.clear(clear_uploaded_video, [], [text_upload_result])
+        video_upload_trans.upload(
+            ingest_gen_transcript, [video_upload_trans, gr.Textbox(value="video", visible=False)], [text_upload_result]
+        )
+        video_upload_trans.clear(hide_text, [], [text_upload_result])
+        video_upload_cap.upload(
+            ingest_gen_caption, [video_upload_cap, gr.Textbox(value="video", visible=False)], [text_upload_result]
+        )
+        video_upload_cap.clear(hide_text, [], [text_upload_result])
+        text_options_radio.change(select_upload_type, [text_options_radio], [video_upload_trans, video_upload_cap])
 
-with gr.Blocks() as upload_gen_captions:
-    gr.Markdown("# Ingest Your Own Video - Utilizing Generated Captions")
-    gr.Markdown(
-        "Please use this interface to ingest your own video if the video has meaningless audio (e.g., background musics, etc...)"
-    )
+with gr.Blocks() as upload_image:
+    gr.Markdown("# Ingest Your Own Image Using Generated or Custom Captions/Labels")
+    gr.Markdown("Use this interface to ingest your own image and generate a caption for it")
+
+    def select_upload_type(choice, request: gr.Request):
+        if choice == "gen_caption":
+            return gr.Image(sources="upload", visible=True), gr.Image(sources="upload", visible=False)
+        else:
+            return gr.Image(sources="upload", visible=False), gr.Image(sources="upload", visible=True)
+
+    with gr.Row():
+        with gr.Column(scale=6):
+            image_upload_cap = gr.Image(type="filepath", sources="upload", elem_id="image_upload_cap", visible=True)
+            image_upload_text = gr.Image(type="filepath", sources="upload", elem_id="image_upload_cap", visible=False)
+        with gr.Column(scale=3):
+            text_options_radio = gr.Radio(
+                [("Generate caption", "gen_caption"), ("Custom caption or label", "custom_caption")],
+                label="Text Options",
+                info="How should text be ingested?",
+                value="gen_caption",
+            )
+            custom_caption = gr.Textbox(visible=True, interactive=True, label="Custom Caption or Label")
+            text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        image_upload_cap.upload(
+            ingest_gen_caption, [image_upload_cap, gr.Textbox(value="image", visible=False)], [text_upload_result]
+        )
+        image_upload_cap.clear(hide_text, [], [text_upload_result])
+        image_upload_text.upload(ingest_with_text, [image_upload_text, custom_caption], [text_upload_result]).then(
+            clear_text, [], [custom_caption]
+        )
+        image_upload_text.clear(hide_text, [], [text_upload_result])
+        text_options_radio.change(select_upload_type, [text_options_radio], [image_upload_cap, image_upload_text])
+
+with gr.Blocks() as upload_audio:
+    gr.Markdown("# Ingest Your Own Audio Using Generated Transcripts")
+    gr.Markdown("Use this interface to ingest your own audio file and generate a transcript for it")
     with gr.Row():
         with gr.Column(scale=6):
-            video_upload_cap = gr.Video(sources="upload", height=512, width=512, elem_id="video_upload_cap")
+            audio_upload = gr.Audio(type="filepath")
+        with gr.Column(scale=3):
+            text_upload_result = gr.Textbox(visible=False, interactive=False, label="Upload Status")
+        audio_upload.upload(
+            ingest_gen_transcript, [audio_upload, gr.Textbox(value="audio", visible=False)], [text_upload_result]
+        )
+        audio_upload.stop_recording(
+            ingest_gen_transcript, [audio_upload, gr.Textbox(value="audio", visible=False)], [text_upload_result]
+        )
+        audio_upload.clear(hide_text, [], [text_upload_result])
+
+with gr.Blocks() as upload_pdf:
+    gr.Markdown("# Ingest Your Own PDF")
+    gr.Markdown("Use this interface to ingest your own PDF file with text, tables, images, and graphs")
+    with gr.Row():
+        with gr.Column(scale=6):
+            image_upload_cap = gr.File()
         with gr.Column(scale=3):
             text_upload_result_cap = gr.Textbox(visible=False, interactive=False, label="Upload Status")
-        video_upload_cap.upload(ingest_video_gen_transcript, [video_upload_cap], [text_upload_result_cap])
-        video_upload_cap.clear(clear_uploaded_video, [], [text_upload_result_cap])
+        image_upload_cap.upload(
+            ingest_gen_caption, [image_upload_cap, gr.Textbox(value="PDF", visible=False)], [text_upload_result_cap]
+        )
+        image_upload_cap.clear(hide_text, [], [text_upload_result_cap])
 
 with gr.Blocks() as qna:
     state = gr.State(multimodalqna_conv.copy())
     with gr.Row():
         with gr.Column(scale=4):
-            video = gr.Video(height=512, width=512, elem_id="video")
+            video = gr.Video(height=512, width=512, elem_id="video", visible=True, label="Media")
+            image = gr.Image(height=512, width=512, elem_id="image", visible=False, label="Media")
         with gr.Column(scale=7):
             chatbot = gr.Chatbot(elem_id="chatbot", label="MultimodalQnA Chatbot", height=390)
             with gr.Row():
@@ -293,7 +448,8 @@ def clear_uploaded_video(request: gr.Request):
                         # show_label=False,
                         # container=False,
                         label="Query",
-                        info="Enter your query here!",
+                        info="Enter a text query below",
+                        # submit_btn=False,
                     )
                 with gr.Column(scale=1, min_width=100):
                     with gr.Row():
@@ -306,7 +462,7 @@ def clear_uploaded_video(request: gr.Request):
         [
             state,
         ],
-        [state, chatbot, textbox, video, clear_btn],
+        [state, chatbot, textbox, video, image, clear_btn],
     )
 
     submit_btn.click(
@@ -318,17 +474,19 @@ def clear_uploaded_video(request: gr.Request):
         [
             state,
         ],
-        [state, chatbot, video, clear_btn],
+        [state, chatbot, video, image, clear_btn],
     )
 with gr.Blocks(css=css) as demo:
     gr.Markdown("# MultimodalQnA")
     with gr.Tabs():
-        with gr.TabItem("MultimodalQnA With Your Videos"):
+        with gr.TabItem("MultimodalQnA"):
             qna.render()
-        with gr.TabItem("Upload Your Own Videos"):
-            upload_gen_trans.render()
-        with gr.TabItem("Upload Your Own Videos"):
-            upload_gen_captions.render()
+        with gr.TabItem("Upload Video"):
+            upload_video.render()
+        with gr.TabItem("Upload Image"):
+            upload_image.render()
+        with gr.TabItem("Upload Audio"):
+            upload_audio.render()
 
 demo.queue()
 app = gr.mount_gradio_app(app, demo, path="/")
@@ -343,6 +501,9 @@ def clear_uploaded_video(request: gr.Request):
     parser.add_argument("--share", action="store_true")
 
     backend_service_endpoint = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/multimodalqna")
+    dataprep_ingest_endpoint = os.getenv(
+        "DATAPREP_INGEST_SERVICE_ENDPOINT", "http://localhost:6007/v1/ingest_with_text"
+    )
     dataprep_gen_transcript_endpoint = os.getenv(
         "DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT", "http://localhost:6007/v1/generate_transcripts"
     )
@@ -353,9 +514,11 @@ def clear_uploaded_video(request: gr.Request):
     logger.info(f"args: {args}")
     global gateway_addr
     gateway_addr = backend_service_endpoint
+    global dataprep_ingest_addr
+    dataprep_ingest_addr = dataprep_ingest_endpoint
     global dataprep_gen_transcript_addr
     dataprep_gen_transcript_addr = dataprep_gen_transcript_endpoint
-    global dataprep_gen_captiono_addr
-    dataprep_gen_captiono_addr = dataprep_gen_caption_endpoint
+    global dataprep_gen_caption_addr
+    dataprep_gen_caption_addr = dataprep_gen_caption_endpoint
 
     uvicorn.run(app, host=args.host, port=args.port)
diff --git a/MultimodalQnA/ui/gradio/requirements.txt b/MultimodalQnA/ui/gradio/requirements.txt
index 36179f83b..bb784f911 100644
--- a/MultimodalQnA/ui/gradio/requirements.txt
+++ b/MultimodalQnA/ui/gradio/requirements.txt
@@ -1,4 +1,4 @@
-gradio==5.0.0
+gradio==5.5.0
 moviepy==1.0.3
 numpy==1.26.4
 opencv-python==4.10.0.82
diff --git a/MultimodalQnA/ui/gradio/utils.py b/MultimodalQnA/ui/gradio/utils.py
index f6e1027eb..7a730a7ed 100644
--- a/MultimodalQnA/ui/gradio/utils.py
+++ b/MultimodalQnA/ui/gradio/utils.py
@@ -5,6 +5,7 @@
 import logging
 import logging.handlers
 import os
+import shutil
 import sys
 from pathlib import Path
 
@@ -118,6 +119,18 @@ def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER
     return cv2.resize(image, dim, interpolation=inter)
 
 
+def make_temp_image(
+    image_name,
+    file_ext,
+    output_image_path: str = "./public/images",
+    output_image_name: str = "image_tmp",
+):
+    Path(output_image_path).mkdir(parents=True, exist_ok=True)
+    output_image = os.path.join(output_image_path, "{}.{}".format(output_image_name, file_ext))
+    shutil.copy(image_name, output_image)
+    return output_image
+
+
 # function to split video at a timestamp
 def split_video(
     video_path,
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
index c5463ad10..bb9239abf 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -175,6 +175,9 @@ export LLM_SERVICE_HOST_PORT_FAQGEN=9002
 export LLM_SERVICE_HOST_PORT_CODEGEN=9001
 export LLM_SERVICE_HOST_PORT_DOCSUM=9003
 export PROMPT_COLLECTION_NAME="prompt"
+export RERANK_SERVER_PORT=8808
+export EMBEDDING_SERVER_PORT=6006
+export LLM_SERVER_PORT=9009
 ```
 
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -290,7 +293,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
 10. DocSum LLM Microservice
 
     ```bash
-    curl http://${host_ip}:9002/v1/chat/docsum\
+    curl http://${host_ip}:9003/v1/chat/docsum\
       -X POST \
       -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
       -H 'Content-Type: application/json'
@@ -299,7 +302,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
 11. FAQGen LLM Microservice
 
     ```bash
-    curl http://${host_ip}:9003/v1/faqgen\
+    curl http://${host_ip}:9002/v1/faqgen\
       -X POST \
       -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
       -H 'Content-Type: application/json'
diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
index 2e56d6584..4bda72223 100644
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -26,7 +26,10 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
+      TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-server
@@ -70,6 +73,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: redis-vector-db
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -111,7 +115,7 @@ services:
       LANGCHAIN_PROJECT: "opea-reranking-service"
     restart: unless-stopped
   tgi_service:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-service
     ports:
       - "9009:80"
@@ -125,7 +129,7 @@ services:
       HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-    command: --model-id ${LLM_MODEL_ID}
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
   llm:
     image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
     container_name: llm-tgi-server
@@ -152,11 +156,12 @@ services:
     depends_on:
       - redis-vector-db
       - tei-embedding-service
-      - embedding
+      - dataprep-redis-service
       - retriever
       - tei-reranking-service
-      - reranking
       - tgi_service
+      - embedding
+      - reranking
       - llm
     ports:
       - "8888:8888"
@@ -165,14 +170,19 @@ services:
       https_proxy: ${https_proxy}
       http_proxy: ${http_proxy}
       MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
-      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      EMBEDDING_SERVER_PORT: ${EMBEDDING_SERVER_PORT:-80}
       RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
-      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
-      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      RERANK_SERVER_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      RERANK_SERVER_PORT: ${RERANK_SERVER_PORT:-80}
+      LLM_SERVER_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+      LLM_SERVER_PORT: ${LLM_SERVER_PORT:-80}
+      LLM_MODEL: ${LLM_MODEL_ID}
+      LOGFLAG: ${LOGFLAG}
     ipc: host
     restart: always
   tgi_service_codegen:
-    image: ghcr.io/huggingface/text-generation-inference:2.1.0
+    image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi_service_codegen
     ports:
       - "8028:80"
diff --git a/ProductivitySuite/tests/test_compose_on_xeon.sh b/ProductivitySuite/tests/test_compose_on_xeon.sh
index 7c18f35d4..11a45f734 100755
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -22,7 +22,7 @@ function build_docker_images() {
     docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log
 
     docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-    docker pull ghcr.io/huggingface/text-generation-inference:2.1.0
+    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     docker images && sleep 1s
 }
 
@@ -74,6 +74,9 @@ function start_services() {
     export LLM_SERVICE_HOST_PORT_FAQGEN=9002
     export LLM_SERVICE_HOST_PORT_CODEGEN=9001
     export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+    export RERANK_SERVER_PORT=8808
+    export EMBEDDING_SERVER_PORT=6006
+    export LLM_SERVER_PORT=9009
     export PROMPT_COLLECTION_NAME="prompt"
 
     # Start Docker Containers
@@ -116,6 +119,9 @@ function validate_service() {
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
     elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
+    elif [[ $SERVICE_NAME == *"docsum-xeon-backend-server"* ]]; then
+	local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$INPUT_DATA" -H 'Content-Type: multipart/form-data' "$URL")
     else
         HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
     fi
@@ -315,7 +321,7 @@ function validate_megaservice() {
     # Curl the DocSum Mega Service
     validate_service \
         "${ip_address}:8890/v1/docsum" \
-        "toolkit" \
+        "embedding" \
         "docsum-xeon-backend-server" \
         "docsum-xeon-backend-server" \
         '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
diff --git a/README.md b/README.md
index 87581d3dd..a34166387 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,19 @@ Deployment are based on released docker images by default, check [docker image l
 
 #### Deploy Examples
 
-| Use Case          | Docker Compose<br/>Deployment on Xeon                                          | Docker Compose<br/>Deployment on Gaudi                                     | Kubernetes with Manifests                                                        | Kubernetes with Helm Charts                                                                                        | Kubernetes with GMC                                                |
-| ----------------- | ------------------------------------------------------------------------------ | -------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
-| ChatQnA           | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md)     | [ChatQnA with Manifests](ChatQnA/kubernetes/intel/README.md)                     | [ChatQnA with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md)     | [ChatQnA with GMC](ChatQnA/kubernetes/intel/README_gmc.md)         |
-| CodeGen           | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md)     | [CodeGen with Manifests](CodeGen/kubernetes/intel/README.md)                     | [CodeGen with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md)     | [CodeGen with GMC](CodeGen/kubernetes/intel/README_gmc.md)         |
-| CodeTrans         | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md)   | [CodeTrans with Manifests](CodeTrans/kubernetes/intel/README.md)                 | [CodeTrans with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/intel/README_gmc.md)     |
-| DocSum            | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md)      | [DocSum with Manifests](DocSum/kubernetes/intel/README.md)                       | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md)       | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md)           |
-| SearchQnA         | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md)   | Not Supported                                                                    | Not Supported                                                                                                      | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md)     |
-| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)      | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md)                       | Not Supported                                                                                                      | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md)           |
-| Translation       | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md)       | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md) | [Translation with Manifests](Translation/kubernetes/intel/README.md)             | Not Supported                                                                                                      | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
-| AudioQnA          | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md)          | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md)    | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md)                   | Not Supported                                                                                                      | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md)       |
-| VisualQnA         | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md)   | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md)                 | Not Supported                                                                                                      | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md)     |
-| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported                                                              | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported                                                                                                      | Not Supported                                                      |
+| Use Case          | Docker Compose<br/>Deployment on Xeon                                          | Docker Compose<br/>Deployment on Gaudi                                       | Kubernetes with Manifests                                                        | Kubernetes with Helm Charts                                                                                        | Kubernetes with GMC                                                |
+| ----------------- | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ |
+| ChatQnA           | [Xeon Instructions](ChatQnA/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](ChatQnA/docker_compose/intel/hpu/gaudi/README.md)       | [ChatQnA with Manifests](ChatQnA/kubernetes/intel/README.md)                     | [ChatQnA with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md)     | [ChatQnA with GMC](ChatQnA/kubernetes/intel/README_gmc.md)         |
+| CodeGen           | [Xeon Instructions](CodeGen/docker_compose/intel/cpu/xeon/README.md)           | [Gaudi Instructions](CodeGen/docker_compose/intel/hpu/gaudi/README.md)       | [CodeGen with Manifests](CodeGen/kubernetes/intel/README.md)                     | [CodeGen with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codegen/README.md)     | [CodeGen with GMC](CodeGen/kubernetes/intel/README_gmc.md)         |
+| CodeTrans         | [Xeon Instructions](CodeTrans/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](CodeTrans/docker_compose/intel/hpu/gaudi/README.md)     | [CodeTrans with Manifests](CodeTrans/kubernetes/intel/README.md)                 | [CodeTrans with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/codetrans/README.md) | [CodeTrans with GMC](CodeTrans/kubernetes/intel/README_gmc.md)     |
+| DocSum            | [Xeon Instructions](DocSum/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](DocSum/docker_compose/intel/hpu/gaudi/README.md)        | [DocSum with Manifests](DocSum/kubernetes/intel/README.md)                       | [DocSum with Helm Charts](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/docsum/README.md)       | [DocSum with GMC](DocSum/kubernetes/intel/README_gmc.md)           |
+| SearchQnA         | [Xeon Instructions](SearchQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](SearchQnA/docker_compose/intel/hpu/gaudi/README.md)     | Not Supported                                                                    | Not Supported                                                                                                      | [SearchQnA with GMC](SearchQnA/kubernetes/intel/README_gmc.md)     |
+| FaqGen            | [Xeon Instructions](FaqGen/docker_compose/intel/cpu/xeon/README.md)            | [Gaudi Instructions](FaqGen/docker_compose/intel/hpu/gaudi/README.md)        | [FaqGen with Manifests](FaqGen/kubernetes/intel/README.md)                       | Not Supported                                                                                                      | [FaqGen with GMC](FaqGen/kubernetes/intel/README_gmc.md)           |
+| Translation       | [Xeon Instructions](Translation/docker_compose/intel/cpu/xeon/README.md)       | [Gaudi Instructions](Translation/docker_compose/intel/hpu/gaudi/README.md)   | [Translation with Manifests](Translation/kubernetes/intel/README.md)             | Not Supported                                                                                                      | [Translation with GMC](Translation/kubernetes/intel/README_gmc.md) |
+| AudioQnA          | [Xeon Instructions](AudioQnA/docker_compose/intel/cpu/xeon/README.md)          | [Gaudi Instructions](AudioQnA/docker_compose/intel/hpu/gaudi/README.md)      | [AudioQnA with Manifests](AudioQnA/kubernetes/intel/README.md)                   | Not Supported                                                                                                      | [AudioQnA with GMC](AudioQnA/kubernetes/intel/README_gmc.md)       |
+| VisualQnA         | [Xeon Instructions](VisualQnA/docker_compose/intel/cpu/xeon/README.md)         | [Gaudi Instructions](VisualQnA/docker_compose/intel/hpu/gaudi/README.md)     | [VisualQnA with Manifests](VisualQnA/kubernetes/intel/README.md)                 | Not Supported                                                                                                      | [VisualQnA with GMC](VisualQnA/kubernetes/intel/README_gmc.md)     |
+| MultimodalQnA     | [Xeon Instructions](MultimodalQnA/docker_compose/intel/cpu/xeon/README.md)     | [Gaudi Instructions](MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md) | Not supported                                                                    | Not supported                                                                                                      | Not supported                                                      |
+| ProductivitySuite | [Xeon Instructions](ProductivitySuite/docker_compose/intel/cpu/xeon/README.md) | Not Supported                                                                | [ProductivitySuite with Manifests](ProductivitySuite/kubernetes/intel/README.md) | Not Supported                                                                                                      | Not Supported                                                      |
 
 ## Supported Examples
 
diff --git a/docker_images_list.md b/docker_images_list.md
index 8d0442c1e..ea25a906e 100644
--- a/docker_images_list.md
+++ b/docker_images_list.md
@@ -26,8 +26,8 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/faqgen](https://hub.docker.com/r/opea/faqgen)                                                         | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/Dockerfile)                            | The docker image served as a faqgen gateway and automatically generating comprehensive, natural sounding Frequently Asked Questions (FAQs) from documents, legal texts, customer inquiries and other sources.                                                                          |
 | [opea/faqgen-ui](https://hub.docker.com/r/opea/faqgen-ui)                                                   | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile)                  | The docker image serves as the docsum UI entry point for easy interaction with users, generating FAQs by pasting in question text.                                                                                                                                                     |
 | [opea/faqgen-react-ui](https://hub.docker.com/r/opea/faqgen-react-ui)                                       | [Link](https://github.com/opea-project/GenAIExamples/blob/main/FaqGen/ui/docker/Dockerfile.react)            | The purpose of the docker image is to provide a user interface for Generate FAQs using React. It allows generating FAQs by uploading files or pasting text.                                                                                                                            |
-| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna)                                           | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile)                     | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video collection to solve the problem.                                                                     |
-| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui)                                     | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile)           | The docker image serves as the docsum UI entry point for easy interaction with users. Answers to questions are generated from videos uploaded by users..                                                                                                                               |
+| [opea/multimodalqna](https://hub.docker.com/r/opea/multimodalqna)                                           | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/Dockerfile)                     | The docker image served as a multimodalqna gateway and dynamically fetches the most relevant multimodal information (frames, transcripts, and/or subtitles) from the user's video, image, or audio collection to solve the problem.                                                    |
+| [opea/multimodalqna-ui](https://hub.docker.com/r/opea/multimodalqna-ui)                                     | [Link](https://github.com/opea-project/GenAIExamples/blob/main/MultimodalQnA/ui/docker/Dockerfile)           | The docker image serves as the multimodalqna UI entry point for easy interaction with users. Answers to questions are generated from uploaded by users.                                                                                                                                |
 | [opea/productivity-suite-react-ui-server](https://hub.docker.com/r/opea/productivity-suite-react-ui-server) | [Link](https://github.com/opea-project/GenAIExamples/blob/main/ProductivitySuite/ui/docker/Dockerfile.react) | The purpose of the docker image is to provide a user interface for Productivity Suite Application using React. It allows interaction by uploading documents and inputs.                                                                                                                |
 | [opea/searchqna](https://hub.docker.com/r/opea/searchqna/tags)                                              | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/Dockerfile)                         | The docker image served as the searchqna gateway to provide service of retrieving accurate and relevant answers to user queries from a knowledge base or dataset                                                                                                                       |
 | [opea/searchqna-ui](https://hub.docker.com/r/opea/searchqna-ui)                                             | [Link](https://github.com/opea-project/GenAIExamples/blob/main/SearchQnA/ui/docker/Dockerfile)               | The docker image acted as the searchqna UI entry for facilitating interaction with users for question answering                                                                                                                                                                        |
@@ -78,7 +78,6 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/llm-tgi](https://hub.docker.com/r/opea/llm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/tgi/Dockerfile)                    | The docker image exposed the OPEA LLM microservice upon TGI docker image for GenAI application use                                                                                                                         |
 | [opea/llm-vllm](https://hub.docker.com/r/opea/llm-vllm)                                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/langchain/Dockerfile)         | The docker image exposed the OPEA LLM microservice upon vLLM docker image for GenAI application use                                                                                                                        |
 | [opea/llm-vllm-llamaindex](https://hub.docker.com/r/opea/llm-vllm-llamaindex)                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/llama_index/Dockerfile)       | This docker image exposes OPEA LLM microservices to the llamaindex framework's vLLM Docker image for use by GenAI applications                                                                                             |
-| [opea/llm-vllm-ray-hpu](https://hub.docker.com/r/opea/llm-vllm-ray-hpu)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/llms/text-generation/vllm/ray/dependency/Dockerfile)    | The docker image exposes Ray-based OPEA LLM microservices upon the vLLM Docker image for use by GenAI applications on the Gaudi                                                                                            |
 | [opea/llava-hpu](https://hub.docker.com/r/opea/llava-hpu)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile.intel_hpu)             | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) service for GenAI application use on the Gaudi                                                                                  |
 | [opea/lvm-tgi](https://hub.docker.com/r/opea/lvm-tgi)                                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/tgi-llava/Dockerfile)                              | This docker image is designed to build a large visual model (LVM) microservice using the HuggingFace Text Generation Inference(TGI) framework. The microservice accepts document input and generates a answer to question. |
 | [opea/lvm-llava](https://hub.docker.com/r/opea/lvm-llava)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/lvms/llava/dependency/Dockerfile)                       | The docker image exposed the OPEA microservice running LLaVA as a large visual model (LVM) server for GenAI application use                                                                                                |
diff --git a/supported_examples.md b/supported_examples.md
index 33b02f71d..0754be3ee 100644
--- a/supported_examples.md
+++ b/supported_examples.md
@@ -186,7 +186,15 @@ FAQ Generation Application leverages the power of large language models (LLMs) t
 
 ### MultimodalQnA
 
-[MultimodalQnA](./MultimodalQnA/README.md) addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos.
+[MultimodalQnA](./MultimodalQnA/README.md) addresses your questions by dynamically fetching the most pertinent multimodal information (frames, transcripts, and/or captions) from your collection of videos, images, or audio files. MultimodalQnA utilizes BridgeTower model, a multimodal encoding transformer model which merges visual and textual data into a unified semantic space. During the ingestion phase, the BridgeTower model embeds both visual cues and auditory facts as texts, and those embeddings are then stored in a vector database. When it comes to answering a question, the MultimodalQnA will fetch its most relevant multimodal content from the vector store and feed it into a downstream Large Vision-Language Model (LVM) as input context to generate a response for the user.
+
+| Service   | Model                                                                                                             | HW         | Description                   |
+| --------- | ----------------------------------------------------------------------------------------------------------------- | ---------- | ----------------------------- |
+| Embedding | [BridgeTower/bridgetower-large-itm-mlm-itc](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-itc)     | Xeon/Gaudi | Multimodal embeddings service |
+| Embedding | [BridgeTower/bridgetower-large-itm-mlm-gaudi](https://huggingface.co/BridgeTower/bridgetower-large-itm-mlm-gaudi) | Gaudi      | Multimodal embeddings service |
+| LVM       | [llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)                                       | Xeon       | LVM service                   |
+| LVM       | [llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)                                     | Xeon       | LVM service                   |
+| LVM       | [llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)                     | Gaudi      | LVM service                   |
 
 ### ProductivitySuite