diff --git a/examples/kubernetes/llama-cpp/Chart.yaml b/examples/kubernetes/llama-cpp/Chart.yaml index 13cd200182883..02cce93ef28c3 100644 --- a/examples/kubernetes/llama-cpp/Chart.yaml +++ b/examples/kubernetes/llama-cpp/Chart.yaml @@ -3,4 +3,4 @@ name: llama-cpp description: llama.cpp Helm chart for Kubernetes type: application version: 0.0.1 -appVersion: "cbbd1efa06f8c09f9dff58ff9d9af509cc4c152b" +appVersion: "77d1ac7e00bf049b9f2bba1b5a310a78318c49c4" diff --git a/examples/kubernetes/llama-cpp/templates/deployment.yaml b/examples/kubernetes/llama-cpp/templates/deployment.yaml index 2fb09f79cc282..223b5dd8a7d68 100644 --- a/examples/kubernetes/llama-cpp/templates/deployment.yaml +++ b/examples/kubernetes/llama-cpp/templates/deployment.yaml @@ -33,44 +33,26 @@ spec: imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - initContainers: - - name: wait-model - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}-{{ .Values.images.downloader.tag }} - env: - - name: MODEL_PATH - value: {{ .Values.model.path }} - - name: MODEL_FILE - value: {{ regexReplaceAll "(.*/)?([^/]+).gguf" .Values.model.file "${2}.gguf" }} - - name: MODEL_SHA256 - value: {{ .Values.model.sha256 }} - - name: MODEL_DOWNLOAD_REPO - value: {{ .Values.model.repo }} - - name: MODEL_DOWNLOAD_FILE - value: {{ .Values.model.file }} - command: - - sh - - -c - args: - - > - set -eux; - while ! echo "${MODEL_SHA256} *${MODEL_PATH}/${MODEL_FILE}" | sha256sum -c - ; do - echo "waiting for model file${MODEL_PATH}/${MODEL_FILE}=${MODEL_SHA256}"; - sleep 1; - done - volumeMounts: - - mountPath: {{ .Values.model.path }} - name: models - readOnly: true + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: {{ .Chart.Name }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.images.server.repository }}:{{ .Values.images.server.name }}-{{ .Values.images.server.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.images.pullPolicy }} + resources: + {{- toYaml .Values.resources | nindent 12 }} command: - {{ .Values.server.command }} args: @@ -105,21 +87,6 @@ spec: - name: http containerPort: {{ .Values.server.port }} protocol: TCP - startupProbe: - httpGet: - path: /health - port: {{ .Values.server.port }} - - livenessProbe: - httpGet: - path: /health - port: {{ .Values.server.port }} - - readinessProbe: - httpGet: - path: /health?fail_on_no_slot - port: {{ .Values.server.port }} - {{- with .Values.volumeMounts }} volumeMounts: {{- toYaml . | nindent 12 }} @@ -132,15 +99,4 @@ spec: - name: models persistentVolumeClaim: claimName: {{ include "server.llama.cpp.fullname" . }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} + readOnly: true diff --git a/examples/kubernetes/llama-cpp/templates/jobs.yaml b/examples/kubernetes/llama-cpp/templates/jobs.yaml index 76942b0a7a7fd..9142bfbfa996d 100644 --- a/examples/kubernetes/llama-cpp/templates/jobs.yaml +++ b/examples/kubernetes/llama-cpp/templates/jobs.yaml @@ -1,13 +1,13 @@ apiVersion: batch/v1 kind: Job metadata: - name: download-model-{{ include "server.llama.cpp.fullname" . }} + name: {{ include "server.llama.cpp.fullname" . }}-download-model labels: {{- include "server.llama.cpp.labels" . | nindent 4 }} spec: template: metadata: - name: download-model-{{ include "server.llama.cpp.fullname" . }} + name: {{ include "server.llama.cpp.fullname" . }}-download-model {{- with .Values.podAnnotations }} annotations: {{- toYaml . | nindent 8 }} @@ -22,7 +22,7 @@ spec: - name: {{ include "server.llama.cpp.fullname" . }}-download-model securityContext: {{- toYaml .Values.securityContext | nindent 12 }} - image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}-{{ .Values.images.downloader.tag }} + image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}{{if .Values.images.downloader.tag }}-{{end}}{{ .Values.images.downloader.tag }} env: - name: MODEL_PATH value: {{ .Values.model.path }} @@ -50,4 +50,17 @@ spec: volumes: - name: models persistentVolumeClaim: - claimName: {{ include "server.llama.cpp.fullname" . }} \ No newline at end of file + claimName: {{ include "server.llama.cpp.fullname" . }} + readOnly: false + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/examples/kubernetes/llama-cpp/templates/pvc.yaml b/examples/kubernetes/llama-cpp/templates/pvc.yaml index 681873c379598..e2c40e5acaea4 100644 --- a/examples/kubernetes/llama-cpp/templates/pvc.yaml +++ b/examples/kubernetes/llama-cpp/templates/pvc.yaml @@ -4,6 +4,8 @@ metadata: name: {{ include "server.llama.cpp.fullname" . }} labels: {{- include "server.llama.cpp.labels" . | nindent 4 }} + annotations: + helm.sh/resource-policy: "keep" spec: accessModes: - ReadWriteOnce @@ -11,7 +13,5 @@ spec: requests: storage: {{ .Values.model.size | quote }} {{- if .Values.persistence.storageClass }} -{{- if (not empty .Values.persistence.storageClass) }} - storageClassName: "{{ .Values.persistence.storageClass }}" -{{- end }} + storageClassName: {{ .Values.persistence.storageClass }} {{- end }} \ No newline at end of file diff --git a/examples/kubernetes/llama-cpp/templates/service.yaml b/examples/kubernetes/llama-cpp/templates/service.yaml index 8e5a222504c1c..09cab5f400968 100644 --- a/examples/kubernetes/llama-cpp/templates/service.yaml +++ b/examples/kubernetes/llama-cpp/templates/service.yaml @@ -8,7 +8,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: {{ .Values.service.port }} + targetPort: {{ .Values.server.port }} protocol: TCP name: http selector: