diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/README.md b/AudioQnA/docker_compose/amd/gpu/rocm/README.md new file mode 100644 index 000000000..3ae8cc8a3 --- /dev/null +++ b/AudioQnA/docker_compose/amd/gpu/rocm/README.md @@ -0,0 +1,170 @@ +# Build Mega Service of AudioQnA on AMD ROCm GPU + +This document outlines the deployment process for a AudioQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice +pipeline on server on AMD ROCm GPU platform. + +## 🚀 Build Docker images + +### 1. Source Code install GenAIComps + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +``` + +### 2. Build ASR Image + +```bash +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile . + + +docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile . +``` + +### 3. Build LLM Image + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +Note: +For compose for ROCm example AMD optimized image hosted in huggingface repo will be used for TGI service: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm (https://github.com/huggingface/text-generation-inference) + +### 4. Build TTS Image + +```bash +docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile . + +docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile . +``` + +### 6. Build MegaService Docker Image + +To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/AudioQnA/ +docker build --no-cache -t opea/audioqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +Then run the command `docker images`, you will have following images ready: + +1. `opea/whisper:latest` +2. `opea/asr:latest` +3. `opea/llm-tgi:latest` +4. `opea/speecht5:latest` +5. `opea/tts:latest` +6. `opea/audioqna:latest` + +## 🚀 Set the environment variables + +Before starting the services with `docker compose`, you have to recheck the following environment variables. + +```bash +export host_ip= # export host_ip=$(hostname -I | awk '{print $1}') +export HUGGINGFACEHUB_API_TOKEN= + +export TGI_LLM_ENDPOINT=http://$host_ip:3006 +export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 + +export ASR_ENDPOINT=http://$host_ip:7066 +export TTS_ENDPOINT=http://$host_ip:7055 + +export MEGA_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} +export TTS_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} + +export ASR_SERVICE_PORT=3001 +export TTS_SERVICE_PORT=3002 +export LLM_SERVICE_PORT=3007 +``` + +or use set_env.sh file to setup environment variables. + +Note: Please replace with host_ip with your external IP address, do not use localhost. + +Note: In order to limit access to a subset of GPUs, please pass each device individually using one or more -device /dev/dri/rendered, where is the card index, starting from 128. (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +Example for set isolation for 1 GPU + + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + +Example for set isolation for 2 GPUs + + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD128:/dev/dri/renderD128 + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/renderD129:/dev/dri/renderD129 + +Please find more information about accessing and restricting AMD GPUs in the link (https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/docker.html#docker-restrict-gpus) + +## 🚀 Start the MegaService + +```bash +cd GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm/ +docker compose up -d +``` + +In following cases, you could build docker image from source by yourself. + +- Failed to download the docker image. +- If you want to use a specific version of Docker image. + +Please refer to 'Build Docker Images' in below. + +## 🚀 Consume the AudioQnA Service + +Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the +base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen +to the response, decode the base64 string and save it as a .wav file. + +```bash +curl http://${host_ip}:3008/v1/audioqna \ + -X POST \ + -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \ + -H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav +``` + +## 🚀 Test MicroServices + +```bash +# whisper service +curl http://${host_ip}:7066/v1/asr \ + -X POST \ + -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' + +# asr microservice +curl http://${host_ip}:3001/v1/audio/transcriptions \ + -X POST \ + -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' + +# tgi service +curl http://${host_ip}:3006/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + +# llm microservice +curl http://${host_ip}:3007/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ + -H 'Content-Type: application/json' + +# speecht5 service +curl http://${host_ip}:7055/v1/tts \ + -X POST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' + +# tts microservice +curl http://${host_ip}:3002/v1/audio/speech \ + -X POST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' + +``` diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml new file mode 100644 index 000000000..651fd5464 --- /dev/null +++ b/AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -0,0 +1,110 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + asr: + image: ${REGISTRY:-opea}/asr:${TAG:-latest} + container_name: asr-service + ports: + - "3001:9099" + ipc: host + environment: + ASR_ENDPOINT: ${ASR_ENDPOINT} + speecht5-service: + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} + container_name: speecht5-service + ports: + - "7055:7055" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + tts: + image: ${REGISTRY:-opea}/tts:${TAG:-latest} + container_name: tts-service + ports: + - "3002:9088" + ipc: host + environment: + TTS_ENDPOINT: ${TTS_ENDPOINT} + tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + container_name: tgi-service + ports: + - "3006:80" + volumes: + - "./data:/data" + shm_size: 1g + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/card1:/dev/dri/card1 + - /dev/dri/renderD136:/dev/dri/renderD136 + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${LLM_MODEL_ID} + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + ipc: host + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server + depends_on: + - tgi-service + ports: + - "3007:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + audioqna-backend-server: + image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} + container_name: audioqna-xeon-backend-server + depends_on: + - asr + - llm + - tts + ports: + - "3008:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} + - ASR_SERVICE_PORT=${ASR_SERVICE_PORT} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_SERVICE_PORT} + - TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP} + - TTS_SERVICE_PORT=${TTS_SERVICE_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh new file mode 100644 index 000000000..8765b702b --- /dev/null +++ b/AudioQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash set_env.sh + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + + +# export host_ip= # export host_ip=$(hostname -I | awk '{print $1}') + +export host_ip="192.165.1.21" +export HUGGINGFACEHUB_API_TOKEN=${YOUR_HUGGINGFACEHUB_API_TOKEN} +# + +export TGI_LLM_ENDPOINT=http://$host_ip:3006 +export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 + +export ASR_ENDPOINT=http://$host_ip:7066 +export TTS_ENDPOINT=http://$host_ip:7055 + +export MEGA_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} +export TTS_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} + +export ASR_SERVICE_PORT=3001 +export TTS_SERVICE_PORT=3002 +export LLM_SERVICE_PORT=3007 diff --git a/AudioQnA/tests/test_compose_on_rocm.sh b/AudioQnA/tests/test_compose_on_rocm.sh new file mode 100644 index 000000000..86a148472 --- /dev/null +++ b/AudioQnA/tests/test_compose_on_rocm.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -ex +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export PATH="~/miniconda3/bin:$PATH" + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../ + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="audioqna whisper asr llm-tgi speecht5 tts" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + echo "docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm" + docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export TGI_LLM_ENDPOINT=http://$ip_address:3006 + export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3 + + export ASR_ENDPOINT=http://$ip_address:7066 + export TTS_ENDPOINT=http://$ip_address:7055 + + export MEGA_SERVICE_HOST_IP=${ip_address} + export ASR_SERVICE_HOST_IP=${ip_address} + export TTS_SERVICE_HOST_IP=${ip_address} + export LLM_SERVICE_HOST_IP=${ip_address} + + export ASR_SERVICE_PORT=3001 + export TTS_SERVICE_PORT=3002 + export LLM_SERVICE_PORT=3007 + + # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs tgi-service > $LOG_PATH/tgi_service_start.log + if grep -q Connected $LOG_PATH/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} +function validate_megaservice() { + result=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json') + echo $result + if [[ $result == *"AAA"* ]]; then + echo "Result correct." + else + docker logs whisper-service > $LOG_PATH/whisper-service.log + docker logs asr-service > $LOG_PATH/asr-service.log + docker logs speecht5-service > $LOG_PATH/tts-service.log + docker logs tts-service > $LOG_PATH/tts-service.log + docker logs tgi-service > $LOG_PATH/tgi-service.log + docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log + docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log + + echo "Result wrong." + exit 1 + fi + +} + +#function validate_frontend() { +# Frontend tests are currently disabled +# cd $WORKPATH/ui/svelte +# local conda_env_name="OPEA_e2e" +# export PATH=${HOME}/miniforge3/bin/:$PATH +## conda remove -n ${conda_env_name} --all -y +## conda create -n ${conda_env_name} python=3.12 -y +# source activate ${conda_env_name} +# +# sed -i "s/localhost/$ip_address/g" playwright.config.ts +# +## conda install -c conda-forge nodejs -y +# npm install && npm ci && npx playwright install --with-deps +# node -v && npm -v && pip list +# +# exit_status=0 +# npx playwright test || exit_status=$? +# +# if [ $exit_status -ne 0 ]; then +# echo "[TEST INFO]: ---------frontend test failed---------" +# exit $exit_status +# else +# echo "[TEST INFO]: ---------frontend test passed---------" +# fi +#} + +function stop_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm/ + docker compose stop && docker compose rm -f +} + +function main() { + + stop_docker + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + start_services + + validate_megaservice + # Frontend tests are currently disabled + # validate_frontend + + stop_docker + echo y | docker system prune + +} + +main