Merge branch 'main' into upgrade_tgi_gaudi

opea-project · Nov 8, 2024 · c4fa56d · c4fa56d
2 parents a67f250 + 9c3023a
commit c4fa56d
Show file tree

Hide file tree

Showing 39 changed files with 752 additions and 694 deletions.
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
@@ -77,6 +77,10 @@ jobs:
               git clone https://github.com/vllm-project/vllm.git
               cd vllm && git rev-parse HEAD && cd ../
           fi
+          if [[ $(grep -c "vllm-hpu:" ${docker_compose_yml}) != 0 ]]; then
+               git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
+               cd vllm-fork && git rev-parse HEAD && cd ../
+          fi
           git clone https://github.com/opea-project/GenAIComps.git
           cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
 

diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Check Online Document Building
+permissions: {}
+
+on:
+  pull_request:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        path: GenAIExamples
+
+    - name: Checkout docs
+      uses: actions/checkout@v4
+      with:
+        repository: opea-project/docs
+        path: docs
+
+    - name: Build Online Document
+      shell: bash
+      run: |
+        echo "build online doc"
+        cd docs
+        bash scripts/build.sh
diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml
@@ -5,7 +5,7 @@ name: Nightly build/publish latest docker images
 
 on:
   schedule:
-    - cron: "30 1 * * *"
+    - cron: "30 13 * * *" # UTC time
   workflow_dispatch:
 
 env:

diff --git a/.github/workflows/pr-path-detection.yml b/.github/workflows/pr-path-detection.yml
@@ -61,11 +61,11 @@ jobs:
           changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
           if  [ -n "$changed_files" ]; then
             for changed_file in $changed_files; do
-              echo $changed_file
+              # echo $changed_file
               url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
               if [ -n "$url_lines" ]; then
                 for url_line in $url_lines; do
-                  echo $url_line
+                  # echo $url_line
                   url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
                   path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
                   response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")

diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh
@@ -9,12 +9,15 @@ set -e
 changed_files=$changed_files
 test_mode=$test_mode
 run_matrix="{\"include\":["
-hardware_list="xeon gaudi" # current support hardware list
 
 examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
 for example in ${examples}; do
     cd $WORKSPACE/$example
     if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
+    cd tests
+    ls -l
+    hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
+    echo "Test supported hardware list = ${hardware_list}"
 
     run_hardware=""
     if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -48,13 +48,13 @@ docker pull opea/chatqna:latest
 docker pull opea/chatqna-ui:latest
 ```
 
-In following cases, you could build docker image from source by yourself.
+NB: You should build docker image from source by yourself if:
 
-- Failed to download the docker image.
+- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
+- You can't download the docker image.
+- You want to use a specific version of Docker image.
 
-- If you want to use a specific version of Docker image.
-
-Please refer to 'Build Docker Images' in below.
+Please refer to ['Build Docker Images'](#🚀-build-docker-images) in below.
 
 ## QuickStart: 3.Consume the ChatQnA Service
 

diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -26,7 +26,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -227,7 +227,7 @@ For users in China who are unable to download models directly from Huggingface,
    export http_proxy="Your_HTTP_Proxy"
    export https_proxy="Your_HTTPs_Proxy"
    # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
-   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
+   export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
    ```
 
 3. Set up other environment variables:
@@ -257,12 +257,6 @@ If use vllm for llm backend.
 docker compose -f compose_vllm.yaml up -d
 ```
 
-If use vllm-on-ray for llm backend.
-
-```bash
-docker compose -f compose_vllm_ray.yaml up -d
-```
-
 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
 
 ```bash
@@ -351,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
      }'
    ```
 
-   ```bash
-   #vLLM-on-Ray Service
-   curl http://${host_ip}:8006/v1/chat/completions \
-     -H "Content-Type: application/json" \
-     -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
-   ```
-
 5. MegaService
 
    ```bash

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -65,25 +65,17 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -86,7 +78,7 @@ services:
       MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   vllm-service:
-    image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
+    image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
     container_name: vllm-gaudi-server
     ports:
       - "8007:80"
@@ -104,7 +96,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
+    command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
   chatqna-gaudi-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-gaudi-backend-server