From 26a329ff0ba064957952cf471b5c02bff89a310a Mon Sep 17 00:00:00 2001 From: artem-astafev Date: Wed, 20 Nov 2024 11:36:13 +0700 Subject: [PATCH] Added docker compose example for AMD ROCm deployment Signed-off-by: artem-astafev --- .../docker_compose/amd/gpu/rocm/compose.yaml | 94 +++++++++++++++++++ .../gpu/rocm/launch_agent_service_tgi_rocm.sh | 49 ++++++++++ .../docker_compose/amd/gpu/rocm/set_env.sh | 47 ++++++++++ ...a_launch_and_validate_agent_tgi_on_rocm.sh | 76 +++++++++++++++ AgentQnA/tests/test_compose_on_rocm.sh | 75 +++++++++++++++ 5 files changed, 341 insertions(+) create mode 100644 AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml create mode 100644 AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh create mode 100644 AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh create mode 100644 AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh create mode 100644 AgentQnA/tests/test_compose_on_rocm.sh diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml new file mode 100644 index 000000000..a7f7c6673 --- /dev/null +++ b/AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -0,0 +1,94 @@ +services: + agent-tgi-server: + image: ${AGENTQNA_TGI_IMAGE} + container_name: agent-tgi-server + ports: + - "${AGENTQNA_TGI_SERVICE_PORT-8085}:80" + volumes: + - /var/opea/agent-service/:/data + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: "http://${HOST_IP}:${AGENTQNA_TGI_SERVICE_PORT}" + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + shm_size: 1g + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/${AGENTQNA_CARD_ID}:/dev/dri/${AGENTQNA_CARD_ID} + - /dev/dri/${AGENTQNA_RENDER_ID}:/dev/dri/${AGENTQNA_RENDER_ID} + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + ipc: host + command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 + + worker-rag-agent: + image: opea/agent-langchain:latest + container_name: rag-agent-endpoint + volumes: + # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/ + - ${TOOLSET_PATH}:/home/user/tools/ + ports: + - "9095:9095" + ipc: host + environment: + ip_address: ${ip_address} + strategy: rag_agent_llama + recursion_limit: ${recursion_limit_worker} + llm_engine: tgi + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT_URL} + model: ${LLM_MODEL_ID} + temperature: ${temperature} + max_new_tokens: ${max_new_tokens} + streaming: false + tools: /home/user/tools/worker_agent_tools.yaml + require_human_feedback: false + RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL} + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-worker-agent-service" + port: 9095 + + supervisor-react-agent: + image: opea/agent-langchain:latest + container_name: react-agent-endpoint + depends_on: + - agent-tgi-server + - worker-rag-agent + volumes: + # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/ + - ${TOOLSET_PATH}:/home/user/tools/ + ports: + - "${AGENTQNA_FRONTEND_PORT}:9090" + ipc: host + environment: + ip_address: ${ip_address} + strategy: react_langgraph + recursion_limit: ${recursion_limit_supervisor} + llm_engine: tgi + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + llm_endpoint_url: ${LLM_ENDPOINT_URL} + model: ${LLM_MODEL_ID} + temperature: ${temperature} + max_new_tokens: ${max_new_tokens} + streaming: false + tools: /home/user/tools/supervisor_agent_tools.yaml + require_human_feedback: false + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-supervisor-agent-service" + CRAG_SERVER: $CRAG_SERVER + WORKER_AGENT_URL: $WORKER_AGENT_URL + port: 9090 diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh new file mode 100644 index 000000000..623d37372 --- /dev/null +++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh @@ -0,0 +1,49 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +WORKPATH=$(dirname "$PWD")/.. +export ip_address=${host_ip} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm +export AGENTQNA_TGI_SERVICE_PORT="8085" + +# LLM related environment variables +export AGENTQNA_CARD_ID="card1" +export AGENTQNA_RENDER_ID="renderD136" +export HF_CACHE_DIR=${HF_CACHE_DIR} +ls $HF_CACHE_DIR +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +#export NUM_SHARDS=4 +export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}" +export temperature=0.01 +export max_new_tokens=512 + +# agent related environment variables +export AGENTQNA_WORKER_AGENT_SERVICE_PORT="9095" +export TOOLSET_PATH=/home/huggingface/datamonsters/amd-opea/GenAIExamples/AgentQnA/tools/ +echo "TOOLSET_PATH=${TOOLSET_PATH}" +export recursion_limit_worker=12 +export recursion_limit_supervisor=10 +export WORKER_AGENT_URL="http://${ip_address}:${AGENTQNA_WORKER_AGENT_SERVICE_PORT}/v1/chat/completions" +export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool" +export CRAG_SERVER=http://${ip_address}:18881 + +export AGENTQNA_FRONTEND_PORT="9090" + +#retrieval_tool +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export REDIS_URL="redis://${host_ip}:26379" +export INDEX_NAME="rag-redis" +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" + +docker compose -f compose.yaml up -d + + diff --git a/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh b/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh new file mode 100644 index 000000000..d7bdb4d5b --- /dev/null +++ b/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +WORKPATH=$(dirname "$PWD")/.. +export ip_address=${host_ip} +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export AGENTQNA_TGI_IMAGE=ghcr.io/huggingface/text-generation-inference:2.3.1-rocm +export AGENTQNA_TGI_SERVICE_PORT="19001" + +# LLM related environment variables +export AGENTQNA_CARD_ID="card1" +export AGENTQNA_RENDER_ID="renderD136" +export HF_CACHE_DIR=${HF_CACHE_DIR} +ls $HF_CACHE_DIR +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" +export NUM_SHARDS=4 +export LLM_ENDPOINT_URL="http://${ip_address}:${AGENTQNA_TGI_SERVICE_PORT}" +export temperature=0.01 +export max_new_tokens=512 + +# agent related environment variables +export AGENTQNA_WORKER_AGENT_SERVICE_PORT="9095" +export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/ +echo "TOOLSET_PATH=${TOOLSET_PATH}" +export recursion_limit_worker=12 +export recursion_limit_supervisor=10 +export WORKER_AGENT_URL="http://${ip_address}:${AGENTQNA_WORKER_AGENT_SERVICE_PORT}/v1/chat/completions" +export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool" +export CRAG_SERVER=http://${ip_address}:18881 + +export AGENTQNA_FRONTEND_PORT="15557" + +#retrieval_tool +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export REDIS_URL="redis://${host_ip}:26379" +export INDEX_NAME="rag-redis" +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" diff --git a/AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh b/AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh new file mode 100644 index 000000000..5b90aa41f --- /dev/null +++ b/AgentQnA/tests/step4a_launch_and_validate_agent_tgi_on_rocm.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -ex + +WORKPATH=$(dirname "$PWD") +export WORKDIR=$WORKPATH/../../ +echo "WORKDIR=${WORKDIR}" +export ip_address=$(hostname -I | awk '{print $1}') +export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/ +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + +export HF_CACHE_DIR=$WORKDIR/hf_cache +if [ ! -d "$HF_CACHE_DIR" ]; then + mkdir -p "$HF_CACHE_DIR" +fi +ls $HF_CACHE_DIR + + +function start_agent_and_api_server() { + echo "Starting CRAG server" + docker run -d --runtime=runc --name=kdd-cup-24-crag-service -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0 + + echo "Starting Agent services" + cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/amd/gpu/rocm + bash launch_agent_service_tgi_rocm.sh +} + +function validate() { + local CONTENT="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected: $CONTENT" + echo 0 + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo 1 + fi +} + +function validate_agent_service() { + echo "----------------Test agent ----------------" + local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ + "query": "Tell me about Michael Jackson song thriller" + }') + local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint") + docker logs rag-agent-endpoint + if [ "$EXIT_CODE" == "1" ]; then + exit 1 + fi + + local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{ + "query": "Tell me about Michael Jackson song thriller" + }') + local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint") + docker logs react-agent-endpoint + if [ "$EXIT_CODE" == "1" ]; then + exit 1 + fi + +} + +function main() { + echo "==================== Start agent ====================" + start_agent_and_api_server + echo "==================== Agent started ====================" + + echo "==================== Validate agent service ====================" + validate_agent_service + echo "==================== Agent service validated ====================" +} + +main diff --git a/AgentQnA/tests/test_compose_on_rocm.sh b/AgentQnA/tests/test_compose_on_rocm.sh new file mode 100644 index 000000000..204de7ead --- /dev/null +++ b/AgentQnA/tests/test_compose_on_rocm.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Copyright (C) 2024 Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -e + +WORKPATH=$(dirname "$PWD") +export WORKDIR=$WORKPATH/../../ +echo "WORKDIR=${WORKDIR}" +export ip_address=$(hostname -I | awk '{print $1}') +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/ + +function stop_crag() { + cid=$(docker ps -aq --filter "name=kdd-cup-24-crag-service") + echo "Stopping container kdd-cup-24-crag-service with cid $cid" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi +} + +function stop_agent_docker() { + cd $WORKPATH/docker_compose/amd/gpu/rocm + # docker compose -f compose.yaml down + container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) + for container_name in $container_list; do + cid=$(docker ps -aq --filter "name=$container_name") + echo "Stopping container $container_name" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + done +} + +function stop_retrieval_tool() { + echo "Stopping Retrieval tool" + local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever + cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/ + # docker compose -f compose.yaml down + container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2) + for container_name in $container_list; do + cid=$(docker ps -aq --filter "name=$container_name") + echo "Stopping container $container_name" + if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi + done +} +echo "workpath: $WORKPATH" +echo "=================== Stop containers ====================" +stop_crag +stop_agent_docker +stop_retrieval_tool + +cd $WORKPATH/tests + +echo "=================== #1 Building docker images====================" +bash step1_build_images.sh +echo "=================== #1 Building docker images completed====================" + +echo "=================== #2 Start retrieval tool====================" +bash step2_start_retrieval_tool.sh +echo "=================== #2 Retrieval tool started====================" + +echo "=================== #3 Ingest data and validate retrieval====================" +bash step3_ingest_data_and_validate_retrieval.sh +echo "=================== #3 Data ingestion and validation completed====================" + +echo "=================== #4 Start agent and API server====================" +bash step4a_launch_and_validate_agent_tgi_on_rocm.sh +echo "=================== #4 Agent test passed ====================" + +echo "=================== #5 Stop agent and API server====================" +stop_crag +stop_agent_docker +stop_retrieval_tool +echo "=================== #5 Agent and API server stopped====================" + +echo y | docker system prune + +echo "ALL DONE!"