Skip to content

Commit

Permalink
Merge branch 'main' into upgrade_tgi_gaudi
Browse files Browse the repository at this point in the history
  • Loading branch information
lvliang-intel authored Nov 8, 2024
2 parents a67f250 + 9c3023a commit c4fa56d
Show file tree
Hide file tree
Showing 39 changed files with 752 additions and 694 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/_example-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ jobs:
git clone https://github.com/vllm-project/vllm.git
cd vllm && git rev-parse HEAD && cd ../
fi
if [[ $(grep -c "vllm-hpu:" ${docker_compose_yml}) != 0 ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
cd vllm-fork && git rev-parse HEAD && cd ../
fi
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
Expand Down
32 changes: 32 additions & 0 deletions .github/workflows/check-online-doc-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

name: Check Online Document Building
permissions: {}

on:
pull_request:
branches: [main]

jobs:
build:
runs-on: ubuntu-latest
steps:

- name: Checkout
uses: actions/checkout@v4
with:
path: GenAIExamples

- name: Checkout docs
uses: actions/checkout@v4
with:
repository: opea-project/docs
path: docs

- name: Build Online Document
shell: bash
run: |
echo "build online doc"
cd docs
bash scripts/build.sh
2 changes: 1 addition & 1 deletion .github/workflows/nightly-docker-build-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Nightly build/publish latest docker images

on:
schedule:
- cron: "30 1 * * *"
- cron: "30 13 * * *" # UTC time
workflow_dispatch:

env:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr-path-detection.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,11 @@ jobs:
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
echo $changed_file
# echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
echo $url_line
# echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/scripts/get_test_matrix.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@ set -e
changed_files=$changed_files
test_mode=$test_mode
run_matrix="{\"include\":["
hardware_list="xeon gaudi" # current support hardware list

examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
for example in ${examples}; do
cd $WORKSPACE/$example
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
cd tests
ls -l
hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
echo "Test supported hardware list = ${hardware_list}"

run_hardware=""
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
Expand Down
10 changes: 5 additions & 5 deletions ChatQnA/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ docker pull opea/chatqna:latest
docker pull opea/chatqna-ui:latest
```

In following cases, you could build docker image from source by yourself.
NB: You should build docker image from source by yourself if:

- Failed to download the docker image.
- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
- You can't download the docker image.
- You want to use a specific version of Docker image.

- If you want to use a specific version of Docker image.

Please refer to 'Build Docker Images' in below.
Please refer to ['Build Docker Images'](#🚀-build-docker-images) in below.

## QuickStart: 3.Consume the ChatQnA Service

Expand Down
Empty file modified ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
100644 → 100755
Empty file.
17 changes: 2 additions & 15 deletions ChatQnA/docker_compose/intel/hpu/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ To set up environment variables for deploying ChatQnA services, follow these ste
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
```

3. Set up other environment variables:
Expand Down Expand Up @@ -227,7 +227,7 @@ For users in China who are unable to download models directly from Huggingface,
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,vllm-ray-service,guardrails
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
```

3. Set up other environment variables:
Expand Down Expand Up @@ -257,12 +257,6 @@ If use vllm for llm backend.
docker compose -f compose_vllm.yaml up -d
```

If use vllm-on-ray for llm backend.

```bash
docker compose -f compose_vllm_ray.yaml up -d
```

If you want to enable guardrails microservice in the pipeline, please follow the below command instead:

```bash
Expand Down Expand Up @@ -351,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
}'
```
```bash
#vLLM-on-Ray Service
curl http://${host_ip}:8006/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
5. MegaService
```bash
Expand Down
12 changes: 2 additions & 10 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,17 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand Down
12 changes: 2 additions & 10 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,25 +65,17 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand Down
16 changes: 4 additions & 12 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,17 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand Down Expand Up @@ -86,7 +78,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
ports:
- "8007:80"
Expand All @@ -104,7 +96,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand Down
Loading

0 comments on commit c4fa56d

Please sign in to comment.