diff --git a/helm-charts/common/guardrails-usvc/README.md b/helm-charts/common/guardrails-usvc/README.md index b5656d04..8fcc6c49 100644 --- a/helm-charts/common/guardrails-usvc/README.md +++ b/helm-charts/common/guardrails-usvc/README.md @@ -47,11 +47,10 @@ curl http://localhost:9090/v1/guardrails \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory | -| image.repository | string | `"opea/guardrails-usvc"` | | -| service.port | string | `"9090"` | | -| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint | -| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using | +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------ | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/guardrails-usvc"` | | +| service.port | string | `"9090"` | | +| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint | +| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using | diff --git a/helm-charts/common/speecht5/templates/configmap.yaml b/helm-charts/common/speecht5/templates/configmap.yaml index d488542f..c153143f 100644 --- a/helm-charts/common/speecht5/templates/configmap.yaml +++ b/helm-charts/common/speecht5/templates/configmap.yaml @@ -18,3 +18,4 @@ data: HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} HUGGINGFACE_HUB_CACHE: "/data" + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} diff --git a/helm-charts/common/speecht5/templates/deployment.yaml b/helm-charts/common/speecht5/templates/deployment.yaml index e0ea6538..0db70e67 100644 --- a/helm-charts/common/speecht5/templates/deployment.yaml +++ b/helm-charts/common/speecht5/templates/deployment.yaml @@ -28,6 +28,40 @@ spec: serviceAccountName: {{ include "speecht5.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.TTS_MODEL_PATH) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "speecht5.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.TTS_MODEL_PATH | quote }}; + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.VOCODER_MODEL| quote }}; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.TTS_MODEL_PATH }}; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.VOCODER_MODEL }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Release.Name }} envFrom: @@ -39,11 +73,7 @@ spec: optional: true {{- end }} securityContext: - {{- if .Values.global.modelUseHostPath }} - {} - {{- else }} {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" {{- if .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }} diff --git a/helm-charts/common/speecht5/values.yaml b/helm-charts/common/speecht5/values.yaml index 4b8fd6eb..d6143306 100644 --- a/helm-charts/common/speecht5/values.yaml +++ b/helm-charts/common/speecht5/values.yaml @@ -8,7 +8,7 @@ replicaCount: 1 TTS_MODEL_PATH: "microsoft/speecht5_tts" -# VOCODE_MODEL: "microsoft/speecht5_hifigan" +VOCODER_MODEL: "microsoft/speecht5_hifigan" image: repository: opea/speecht5 @@ -47,6 +47,9 @@ securityContext: - ALL seccompProfile: type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 service: type: ClusterIP @@ -96,6 +99,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/tei/templates/configmap.yaml b/helm-charts/common/tei/templates/configmap.yaml index 6b40613a..7f7c2530 100644 --- a/helm-charts/common/tei/templates/configmap.yaml +++ b/helm-charts/common/tei/templates/configmap.yaml @@ -32,3 +32,4 @@ data: {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} {{- end }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} diff --git a/helm-charts/common/tei/templates/deployment.yaml b/helm-charts/common/tei/templates/deployment.yaml index 0d10c015..10f13981 100644 --- a/helm-charts/common/tei/templates/deployment.yaml +++ b/helm-charts/common/tei/templates/deployment.yaml @@ -31,6 +31,38 @@ spec: serviceAccountName: {{ include "tei.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.EMBEDDING_MODEL_ID) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "tei.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); + chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Chart.Name }} envFrom: @@ -42,11 +74,7 @@ spec: optional: true {{- end }} securityContext: - {{- if .Values.global.modelUseHostPath }} - {} - {{- else }} {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" {{- if .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }} diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 9d619387..547c0a91 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -61,6 +61,9 @@ securityContext: - ALL seccompProfile: type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 service: type: ClusterIP @@ -110,6 +113,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/teirerank/templates/configmap.yaml b/helm-charts/common/teirerank/templates/configmap.yaml index f8c25bc0..e1a047eb 100644 --- a/helm-charts/common/teirerank/templates/configmap.yaml +++ b/helm-charts/common/teirerank/templates/configmap.yaml @@ -32,3 +32,4 @@ data: {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} {{- end }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} diff --git a/helm-charts/common/teirerank/templates/deployment.yaml b/helm-charts/common/teirerank/templates/deployment.yaml index bb843889..3387fad0 100644 --- a/helm-charts/common/teirerank/templates/deployment.yaml +++ b/helm-charts/common/teirerank/templates/deployment.yaml @@ -31,6 +31,38 @@ spec: serviceAccountName: {{ include "teirerank.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.RERANK_MODEL_ID) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "teirerank.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); + chmod -R g+w /data/models--{{ replace "/" "--" .Values.RERANK_MODEL_ID }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Chart.Name }} envFrom: diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index 1b54d5ae..254f4d16 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -61,6 +61,9 @@ securityContext: - ALL seccompProfile: type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 service: type: ClusterIP @@ -110,6 +113,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md index 27acd96e..4d1d91e9 100644 --- a/helm-charts/common/tgi/README.md +++ b/helm-charts/common/tgi/README.md @@ -9,20 +9,20 @@ To install the chart, run the following: ```console cd GenAIInfra/helm-charts/common export MODELDIR=/mnt/opea-models -export MODELNAME="bigscience/bloom-560m" +export MODELNAME="Intel/neural-chat-7b-v3-3" export HFTOKEN="insert-your-huggingface-token-here" helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} # To deploy on Gaudi enabled kubernetes cluster # helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml ``` -By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. +By default, the tgi service will downloading the "Intel/neural-chat-7b-v3-3" which is about 54GB. If you already cached the model locally, you can pass it to container like this example: MODELDIR=/mnt/opea-models -MODELNAME="/data/models--bigscience--bloom-560m" +MODELNAME="/data/models--Intel--neural-chat-7b-v3-3" ## Verify @@ -41,12 +41,10 @@ curl http://localhost:2080/generate \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| image.tag | string | `"1.4"` | | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml index 2411a5b6..66d62dda 100644 --- a/helm-charts/common/tgi/templates/deployment.yaml +++ b/helm-charts/common/tgi/templates/deployment.yaml @@ -31,6 +31,38 @@ spec: serviceAccountName: {{ include "tgi.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.LLM_MODEL_ID) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "tgi.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) $(MODEL_ID); + chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Chart.Name }} envFrom: @@ -42,11 +74,7 @@ spec: optional: true {{- end }} securityContext: - {{- if .Values.global.modelUseHostPath }} - {} - {{- else }} {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" {{- if .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }} diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index 827138d0..dff938e6 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -62,6 +62,9 @@ securityContext: - ALL seccompProfile: type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 service: type: ClusterIP diff --git a/helm-charts/common/vllm/templates/deployment.yaml b/helm-charts/common/vllm/templates/deployment.yaml index 63aba08f..71aef029 100644 --- a/helm-charts/common/vllm/templates/deployment.yaml +++ b/helm-charts/common/vllm/templates/deployment.yaml @@ -28,6 +28,35 @@ spec: serviceAccountName: {{ include "vllm.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.LLM_MODEL_ID) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "vllm.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.LLM_MODEL_ID | quote }}; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Chart.Name }} envFrom: @@ -39,11 +68,7 @@ spec: optional: true {{- end }} securityContext: - {{- if .Values.global.modelUseHostPath }} - {} - {{- else }} {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" {{- if .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }} diff --git a/helm-charts/common/whisper/templates/configmap.yaml b/helm-charts/common/whisper/templates/configmap.yaml index 2e27dea5..39ab3db4 100644 --- a/helm-charts/common/whisper/templates/configmap.yaml +++ b/helm-charts/common/whisper/templates/configmap.yaml @@ -18,3 +18,4 @@ data: HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} HUGGINGFACE_HUB_CACHE: "/data" + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} diff --git a/helm-charts/common/whisper/templates/deployment.yaml b/helm-charts/common/whisper/templates/deployment.yaml index 66b31dd3..4d4503d9 100644 --- a/helm-charts/common/whisper/templates/deployment.yaml +++ b/helm-charts/common/whisper/templates/deployment.yaml @@ -28,6 +28,38 @@ spec: serviceAccountName: {{ include "whisper.serviceAccountName" . }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.ASR_MODEL_PATH) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "whisper.fullname" . }}-config + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-c'] + args: + - | + huggingface-cli download --cache-dir /data --token $(HF_TOKEN) {{ .Values.ASR_MODEL_PATH | quote }}; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.ASR_MODEL_PATH }} + volumeMounts: + - mountPath: /data + name: model-volume + {{- end }} containers: - name: {{ .Release.Name }} envFrom: @@ -39,11 +71,7 @@ spec: optional: true {{- end }} securityContext: - {{- if .Values.global.modelUseHostPath }} - {} - {{- else }} {{- toYaml .Values.securityContext | nindent 12 }} - {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" {{- if .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }} diff --git a/helm-charts/common/whisper/values.yaml b/helm-charts/common/whisper/values.yaml index 680dc3c5..74c101e6 100644 --- a/helm-charts/common/whisper/values.yaml +++ b/helm-charts/common/whisper/values.yaml @@ -46,6 +46,9 @@ securityContext: - ALL seccompProfile: type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 service: type: ClusterIP @@ -95,6 +98,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others.