Skip to content

Commit

Permalink
Add model-downloader (#613)
Browse files Browse the repository at this point in the history
Add initContainer to download model to avoid the permission conflict
issue of the .locks directory in the host modeUseHostPath path, when
sharing the host model directory between different services.

Also always set securityContext in all cases.

Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao authored Dec 11, 2024
1 parent 56674fc commit 97a9450
Show file tree
Hide file tree
Showing 17 changed files with 232 additions and 41 deletions.
15 changes: 7 additions & 8 deletions helm-charts/common/guardrails-usvc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ curl http://localhost:9090/v1/guardrails \

## Values

| Key | Type | Default | Description |
| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token |
| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
| image.repository | string | `"opea/guardrails-usvc"` | |
| service.port | string | `"9090"` | |
| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint |
| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using |
| Key | Type | Default | Description |
| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------ |
| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token |
| image.repository | string | `"opea/guardrails-usvc"` | |
| service.port | string | `"9090"` | |
| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint |
| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using |
1 change: 1 addition & 0 deletions helm-charts/common/speecht5/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ data:
HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
{{- end }}
HUGGINGFACE_HUB_CACHE: "/data"
HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
38 changes: 34 additions & 4 deletions helm-charts/common/speecht5/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,40 @@ spec:
serviceAccountName: {{ include "speecht5.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if not (hasPrefix "/data/" .Values.TTS_MODEL_PATH) }}
initContainers:
- name: model-downloader
envFrom:
- configMapRef:
name: {{ include "speecht5.fullname" . }}-config
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
{{- if hasKey .Values.securityContext "runAsGroup" }}
runAsGroup: {{ .Values.securityContext.runAsGroup }}
{{- end }}
capabilities:
drop:
- ALL
add:
- DAC_OVERRIDE
# To be able to make data model directory group writable for
# previously downloaded model by old versions of helm chart
- FOWNER
seccompProfile:
type: RuntimeDefault
image: huggingface/downloader:0.17.3
command: ['sh', '-c']
args:
- |
huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.TTS_MODEL_PATH | quote }};
huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.VOCODER_MODEL| quote }};
chmod -R g+w /data/models--{{ replace "/" "--" .Values.TTS_MODEL_PATH }};
chmod -R g+w /data/models--{{ replace "/" "--" .Values.VOCODER_MODEL }}
volumeMounts:
- mountPath: /data
name: model-volume
{{- end }}
containers:
- name: {{ .Release.Name }}
envFrom:
Expand All @@ -39,11 +73,7 @@ spec:
optional: true
{{- end }}
securityContext:
{{- if .Values.global.modelUseHostPath }}
{}
{{- else }}
{{- toYaml .Values.securityContext | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
{{- if .Values.image.pullPolicy }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
Expand Down
6 changes: 5 additions & 1 deletion helm-charts/common/speecht5/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
replicaCount: 1

TTS_MODEL_PATH: "microsoft/speecht5_tts"
# VOCODE_MODEL: "microsoft/speecht5_hifigan"
VOCODER_MODEL: "microsoft/speecht5_hifigan"

image:
repository: opea/speecht5
Expand Down Expand Up @@ -47,6 +47,9 @@ securityContext:
- ALL
seccompProfile:
type: RuntimeDefault
# Init container sets the downloaded model dir to be group writable, so that container
# can keep its lock file there. This relies on both containers using the same group ID.
runAsGroup: 0

service:
type: ClusterIP
Expand Down Expand Up @@ -96,6 +99,7 @@ global:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
# service account name to be shared with all parent/child charts.
# If set, it will overwrite serviceAccount.name.
# If set, and serviceAccount.create is false, it will assume this service account is already created by others.
Expand Down
1 change: 1 addition & 0 deletions helm-charts/common/tei/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ data:
{{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
{{- end }}
HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
36 changes: 32 additions & 4 deletions helm-charts/common/tei/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,38 @@ spec:
serviceAccountName: {{ include "tei.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if not (hasPrefix "/data/" .Values.EMBEDDING_MODEL_ID) }}
initContainers:
- name: model-downloader
envFrom:
- configMapRef:
name: {{ include "tei.fullname" . }}-config
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
{{- if hasKey .Values.securityContext "runAsGroup" }}
runAsGroup: {{ .Values.securityContext.runAsGroup }}
{{- end }}
capabilities:
drop:
- ALL
add:
- DAC_OVERRIDE
# To be able to make data model directory group writable for
# previously downloaded model by old versions of helm chart
- FOWNER
seccompProfile:
type: RuntimeDefault
image: huggingface/downloader:0.17.3
command: ['sh', '-c']
args:
- |
huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }}
volumeMounts:
- mountPath: /data
name: model-volume
{{- end }}
containers:
- name: {{ .Chart.Name }}
envFrom:
Expand All @@ -42,11 +74,7 @@ spec:
optional: true
{{- end }}
securityContext:
{{- if .Values.global.modelUseHostPath }}
{}
{{- else }}
{{- toYaml .Values.securityContext | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
{{- if .Values.image.pullPolicy }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
Expand Down
4 changes: 4 additions & 0 deletions helm-charts/common/tei/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ securityContext:
- ALL
seccompProfile:
type: RuntimeDefault
# Init container sets the downloaded model dir to be group writable, so that container
# can keep its lock file there. This relies on both containers using the same group ID.
runAsGroup: 0

service:
type: ClusterIP
Expand Down Expand Up @@ -110,6 +113,7 @@ global:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
# service account name to be shared with all parent/child charts.
# If set, it will overwrite serviceAccount.name.
# If set, and serviceAccount.create is false, it will assume this service account is already created by others.
Expand Down
1 change: 1 addition & 0 deletions helm-charts/common/teirerank/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ data:
{{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
{{- end }}
HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
32 changes: 32 additions & 0 deletions helm-charts/common/teirerank/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,38 @@ spec:
serviceAccountName: {{ include "teirerank.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if not (hasPrefix "/data/" .Values.RERANK_MODEL_ID) }}
initContainers:
- name: model-downloader
envFrom:
- configMapRef:
name: {{ include "teirerank.fullname" . }}-config
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
{{- if hasKey .Values.securityContext "runAsGroup" }}
runAsGroup: {{ .Values.securityContext.runAsGroup }}
{{- end }}
capabilities:
drop:
- ALL
add:
- DAC_OVERRIDE
# To be able to make data model directory group writable for
# previously downloaded model by old versions of helm chart
- FOWNER
seccompProfile:
type: RuntimeDefault
image: huggingface/downloader:0.17.3
command: ['sh', '-c']
args:
- |
huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
chmod -R g+w /data/models--{{ replace "/" "--" .Values.RERANK_MODEL_ID }}
volumeMounts:
- mountPath: /data
name: model-volume
{{- end }}
containers:
- name: {{ .Chart.Name }}
envFrom:
Expand Down
4 changes: 4 additions & 0 deletions helm-charts/common/teirerank/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ securityContext:
- ALL
seccompProfile:
type: RuntimeDefault
# Init container sets the downloaded model dir to be group writable, so that container
# can keep its lock file there. This relies on both containers using the same group ID.
runAsGroup: 0

service:
type: ClusterIP
Expand Down Expand Up @@ -110,6 +113,7 @@ global:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
# service account name to be shared with all parent/child charts.
# If set, it will overwrite serviceAccount.name.
# If set, and serviceAccount.create is false, it will assume this service account is already created by others.
Expand Down
22 changes: 10 additions & 12 deletions helm-charts/common/tgi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ To install the chart, run the following:
```console
cd GenAIInfra/helm-charts/common
export MODELDIR=/mnt/opea-models
export MODELNAME="bigscience/bloom-560m"
export MODELNAME="Intel/neural-chat-7b-v3-3"
export HFTOKEN="insert-your-huggingface-token-here"
helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
# To deploy on Gaudi enabled kubernetes cluster
# helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
```

By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
By default, the tgi service will downloading the "Intel/neural-chat-7b-v3-3" which is about 54GB.

If you already cached the model locally, you can pass it to container like this example:

MODELDIR=/mnt/opea-models

MODELNAME="/data/models--bigscience--bloom-560m"
MODELNAME="/data/models--Intel--neural-chat-7b-v3-3"

## Verify

Expand All @@ -41,12 +41,10 @@ curl http://localhost:2080/generate \

## Values

| Key | Type | Default | Description |
| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory |
| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token |
| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
| image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | |
| image.tag | string | `"1.4"` | |
| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! |
| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! |
| Key | Type | Default | Description |
| ------------------------------- | ------ | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory |
| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token |
| global.modelUseHostPath | string | `""` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! |
| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! |
36 changes: 32 additions & 4 deletions helm-charts/common/tgi/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,38 @@ spec:
serviceAccountName: {{ include "tgi.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- if not (hasPrefix "/data/" .Values.LLM_MODEL_ID) }}
initContainers:
- name: model-downloader
envFrom:
- configMapRef:
name: {{ include "tgi.fullname" . }}-config
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
{{- if hasKey .Values.securityContext "runAsGroup" }}
runAsGroup: {{ .Values.securityContext.runAsGroup }}
{{- end }}
capabilities:
drop:
- ALL
add:
- DAC_OVERRIDE
# To be able to make data model directory group writable for
# previously downloaded model by old versions of helm chart
- FOWNER
seccompProfile:
type: RuntimeDefault
image: huggingface/downloader:0.17.3
command: ['sh', '-c']
args:
- |
huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID);
chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}
volumeMounts:
- mountPath: /data
name: model-volume
{{- end }}
containers:
- name: {{ .Chart.Name }}
envFrom:
Expand All @@ -42,11 +74,7 @@ spec:
optional: true
{{- end }}
securityContext:
{{- if .Values.global.modelUseHostPath }}
{}
{{- else }}
{{- toYaml .Values.securityContext | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
{{- if .Values.image.pullPolicy }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
Expand Down
3 changes: 3 additions & 0 deletions helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ securityContext:
- ALL
seccompProfile:
type: RuntimeDefault
# Init container sets the downloaded model dir to be group writable, so that container
# can keep its lock file there. This relies on both containers using the same group ID.
runAsGroup: 0

service:
type: ClusterIP
Expand Down
Loading

0 comments on commit 97a9450

Please sign in to comment.