-
Notifications
You must be signed in to change notification settings - Fork 2
/
docker-compose.yaml
86 lines (83 loc) · 2.39 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
version: "3"
services:
# # Using vllm
# inference:
# image: vllm/vllm-openai:latest
# # build:
# # context: ./inference
# # dockerfile: Dockerfile.vllm
# ports:
# - "8000:8000"
# deploy:
# resources:
# reservations:
# devices:
# - driver: nvidia
# device_ids: [ '0' ]
# # count: 1
# capabilities: [ gpu ]
# volumes:
# - ~/.cache/huggingface:/root/.cache/huggingface
# # Uncomment to use local model by mounting it inside the container
# # - ${MODELPATH}:/model
# environment:
# - HF_TOKEN=${HF_TOKEN}
# - MODEL=${MODEL}
# command: --model=${MODEL} --port=8000 --tensor-parallel-size=${NUM_GPUS:-1} ${EXTRA_INFERENCE_ARGS:-}
# shm_size: '2gb'
# Using sglang:
inference:
build:
context: ./inference
dockerfile: Dockerfile.sglang
ports:
- "8000:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
# Alternatively specify specific GPU device IDs
# device_ids: [ '2' ]
capabilities: [gpu]
# In case you want to persist the HF cache, uncomment the following line.
# volumes:
# - ~/.cache/huggingface:/root/.cache/huggingface
environment:
- HF_TOKEN=${HF_TOKEN}
- MODEL=${MODEL}
shm_size: "2gb"
backend:
build:
context: ./backend
# We need to give an additional context here so the
# backend can build the frontend code for static serving.
additional_contexts:
- frontend=./frontend
environment:
INFERENCE_API_URI: http://inference:8000/v1
MODEL: ${MODEL}
# Change here if you use an external database.
DATABASE_URL: mysql+pymysql://myuser:mypassword@db/mydb
ports:
- "80:8001"
# The following is important - if the backend starts before the DB
# is ready, it can crash immediately.
depends_on:
db:
condition: service_healthy
db:
image: mysql:latest
environment:
# You'll want to change these values in production.
MYSQL_DATABASE: mydb
MYSQL_USER: myuser
MYSQL_PASSWORD: mypassword
MYSQL_ROOT_PASSWORD: rootpassword
volumes:
- /var/lib/mysql
healthcheck:
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
timeout: 20s
retries: 10