From 08fd917528f9c2518866db8093d0e2c319fb1219 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Mon, 11 Nov 2024 14:25:14 +0000 Subject: [PATCH] Updated compose_remote.yaml file to have vllm instead of tgi. Moved tgi related details to compose_tgi_remote.yaml file Signed-off-by: sgurunat --- .../intel/hpu/gaudi/compose_remote.yaml | 49 +++++++++----- .../intel/hpu/gaudi/compose_tgi_remote.yaml | 66 +++---------------- 2 files changed, 41 insertions(+), 74 deletions(-) diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml index 072140a0e..02c3f9882 100644 --- a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml @@ -36,6 +36,9 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} @@ -71,13 +74,16 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} restart: unless-stopped llm: - image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} - container_name: llm-tgi-gaudi-server + image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest} + container_name: llm-vllm-gaudi-server ports: - "9000:9000" ipc: host @@ -85,15 +91,12 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - MODEL_CONFIGS: ${MODEL_CONFIGS} + vLLM_ENDPOINT: ${vLLM_ENDPOINT} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} - restart: unless-stopped chatqna-gaudi-backend-server: image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} @@ -118,8 +121,8 @@ services: ipc: host restart: always llm_codegen: - image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} - container_name: llm-tgi-server-codegen + image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest} + container_name: llm-vllm-server-codegen ports: - "9001:9000" ipc: host @@ -127,11 +130,15 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + vLLM_ENDPOINT: ${vLLM_ENDPOINT_CODEGEN} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} + LLM_MODEL_ID: ${LLM_MODEL_ID} restart: unless-stopped codegen-gaudi-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} @@ -148,7 +155,7 @@ services: ipc: host restart: always llm_faqgen: - image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest} container_name: llm-faqgen-server ports: - "9002:9000" @@ -157,11 +164,15 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + vLLM_ENDPOINT: ${vLLM_ENDPOINT_FAQGEN} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} + LLM_MODEL_ID: ${LLM_MODEL_ID} restart: unless-stopped faqgen-gaudi-backend-server: image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} @@ -180,7 +191,7 @@ services: ipc: host restart: always llm_docsum_server: - image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest} container_name: llm-docsum-server ports: - "9003:9000" @@ -189,11 +200,15 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + vLLM_ENDPOINT: ${vLLM_ENDPOINT_DOCSUM} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" CLIENTID: ${CLIENTID} CLIENT_SECRET: ${CLIENT_SECRET} TOKEN_URL: ${TOKEN_URL} + LLM_MODEL_ID: ${LLM_MODEL_ID} restart: unless-stopped docsum-gaudi-backend-server: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} @@ -259,7 +274,7 @@ services: - KEYCLOAK_ADMIN_PASSWORD=admin - KC_PROXY=edge ipc: host - command: + command: - start-dev restart: always productivity-suite-gaudi-react-ui-server: @@ -285,4 +300,4 @@ services: restart: always networks: default: - driver: bridge + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml index a4fe71196..072140a0e 100644 --- a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml @@ -25,31 +25,9 @@ services: https_proxy: ${https_proxy} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} - tei-embedding-service: - image: ghcr.io/huggingface/tei-gaudi:latest - container_name: tei-embedding-gaudi-server - ports: - - "6006:80" - volumes: - - "./data_embedding:/data" - shm_size: 1g - runtime: habana - cap_add: - - SYS_NICE - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - MAX_WARMUP_SEQUENCE_LENGTH: 512 - command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate embedding: image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} container_name: embedding-tei-server - depends_on: - - tei-embedding-service ports: - "6000:6000" ipc: host @@ -58,6 +36,9 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} restart: unless-stopped retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} @@ -76,34 +57,9 @@ services: TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped - tei-reranking-service: - image: ghcr.io/huggingface/tei-gaudi:latest - container_name: tei-reranking-server - ports: - - "8808:80" - volumes: - - "./data_tei:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - MAX_WARMUP_SEQUENCE_LENGTH: 512 - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model-id ${RERANK_MODEL_ID} --auto-truncate reranking: image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} container_name: reranking-tei-gaudi-server - depends_on: - - tei-reranking-service ports: - "8000:8000" ipc: host @@ -115,12 +71,13 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} restart: unless-stopped llm: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-gaudi-server - depends_on: - - tgi_service ports: - "9000:9000" ipc: host @@ -143,12 +100,9 @@ services: container_name: chatqna-gaudi-backend-server depends_on: - redis-vector-db - - tei-embedding-service - embedding - retriever - - tei-reranking-service - reranking - - tgi_service - llm ports: - "8888:8888" @@ -182,8 +136,6 @@ services: codegen-gaudi-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-gaudi-backend-server - depends_on: - - llm ports: - "7778:7778" environment: @@ -247,7 +199,6 @@ services: image: ${REGISTRY:-opea}/docsum:${TAG:-latest} container_name: docsum-gaudi-backend-server depends_on: - - tgi_service - llm_docsum_server ports: - "8890:8888" @@ -302,13 +253,14 @@ services: image: quay.io/keycloak/keycloak:25.0.2 container_name: keycloak-server ports: - - 8081:8080 + - 8080:8080 environment: - KEYCLOAK_ADMIN=admin - KEYCLOAK_ADMIN_PASSWORD=admin - KC_PROXY=edge ipc: host - command: start-dev + command: + - start-dev restart: always productivity-suite-gaudi-react-ui-server: image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}