Skip to content

Commit

Permalink
Updated compose_remote.yaml file to have vllm instead of tgi. Moved t…
Browse files Browse the repository at this point in the history
…gi related details to compose_tgi_remote.yaml file

Signed-off-by: sgurunat <[email protected]>
  • Loading branch information
sgurunat committed Nov 11, 2024
1 parent 740f07a commit 08fd917
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
Expand Down Expand Up @@ -71,29 +74,29 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
restart: unless-stopped
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-gaudi-server
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
MODEL_CONFIGS: ${MODEL_CONFIGS}
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}

restart: unless-stopped
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
Expand All @@ -118,20 +121,24 @@ services:
ipc: host
restart: always
llm_codegen:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server-codegen
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-server-codegen
ports:
- "9001:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
vLLM_ENDPOINT: ${vLLM_ENDPOINT_CODEGEN}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
LLM_MODEL_ID: ${LLM_MODEL_ID}
restart: unless-stopped
codegen-gaudi-backend-server:
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
Expand All @@ -148,7 +155,7 @@ services:
ipc: host
restart: always
llm_faqgen:
image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-faqgen-vllm:${TAG:-latest}
container_name: llm-faqgen-server
ports:
- "9002:9000"
Expand All @@ -157,11 +164,15 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
vLLM_ENDPOINT: ${vLLM_ENDPOINT_FAQGEN}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
LLM_MODEL_ID: ${LLM_MODEL_ID}
restart: unless-stopped
faqgen-gaudi-backend-server:
image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
Expand All @@ -180,7 +191,7 @@ services:
ipc: host
restart: always
llm_docsum_server:
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
image: ${REGISTRY:-opea}/llm-docsum-vllm:${TAG:-latest}
container_name: llm-docsum-server
ports:
- "9003:9000"
Expand All @@ -189,11 +200,15 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM}
vLLM_ENDPOINT: ${vLLM_ENDPOINT_DOCSUM}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
LLM_MODEL_ID: ${LLM_MODEL_ID}
restart: unless-stopped
docsum-gaudi-backend-server:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
Expand Down Expand Up @@ -259,7 +274,7 @@ services:
- KEYCLOAK_ADMIN_PASSWORD=admin
- KC_PROXY=edge
ipc: host
command:
command:
- start-dev
restart: always
productivity-suite-gaudi-react-ui-server:
Expand All @@ -285,4 +300,4 @@ services:
restart: always
networks:
default:
driver: bridge
driver: bridge
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,9 @@ services:
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "6006:80"
volumes:
- "./data_embedding:/data"
shm_size: 1g
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
Expand All @@ -58,6 +36,9 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand All @@ -76,34 +57,9 @@ services:
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-server
ports:
- "8808:80"
volumes:
- "./data_tei:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
Expand All @@ -115,12 +71,13 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
CLIENTID: ${CLIENTID}
CLIENT_SECRET: ${CLIENT_SECRET}
TOKEN_URL: ${TOKEN_URL}
restart: unless-stopped
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi_service
ports:
- "9000:9000"
ipc: host
Expand All @@ -143,12 +100,9 @@ services:
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi_service
- llm
ports:
- "8888:8888"
Expand Down Expand Up @@ -182,8 +136,6 @@ services:
codegen-gaudi-backend-server:
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
container_name: codegen-gaudi-backend-server
depends_on:
- llm
ports:
- "7778:7778"
environment:
Expand Down Expand Up @@ -247,7 +199,6 @@ services:
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
container_name: docsum-gaudi-backend-server
depends_on:
- tgi_service
- llm_docsum_server
ports:
- "8890:8888"
Expand Down Expand Up @@ -302,13 +253,14 @@ services:
image: quay.io/keycloak/keycloak:25.0.2
container_name: keycloak-server
ports:
- 8081:8080
- 8080:8080
environment:
- KEYCLOAK_ADMIN=admin
- KEYCLOAK_ADMIN_PASSWORD=admin
- KC_PROXY=edge
ipc: host
command: start-dev
command:
- start-dev
restart: always
productivity-suite-gaudi-react-ui-server:
image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
Expand Down

0 comments on commit 08fd917

Please sign in to comment.