From 39efb7ace5d44d4e402a80aa19febb29aa97ca5a Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Mon, 8 Apr 2024 18:15:05 +0200
Subject: [PATCH] kubernetes: update

---
 examples/kubernetes/llama-cpp/Chart.yaml      |  2 +-
 .../llama-cpp/templates/deployment.yaml       | 74 ++++---------------
 .../kubernetes/llama-cpp/templates/jobs.yaml  | 21 +++++-
 .../kubernetes/llama-cpp/templates/pvc.yaml   |  6 +-
 .../llama-cpp/templates/service.yaml          |  2 +-
 5 files changed, 37 insertions(+), 68 deletions(-)

diff --git a/examples/kubernetes/llama-cpp/Chart.yaml b/examples/kubernetes/llama-cpp/Chart.yaml
index 13cd200182883..02cce93ef28c3 100644
--- a/examples/kubernetes/llama-cpp/Chart.yaml
+++ b/examples/kubernetes/llama-cpp/Chart.yaml
@@ -3,4 +3,4 @@ name: llama-cpp
 description: llama.cpp Helm chart for Kubernetes
 type: application
 version: 0.0.1
-appVersion: "cbbd1efa06f8c09f9dff58ff9d9af509cc4c152b"
+appVersion: "77d1ac7e00bf049b9f2bba1b5a310a78318c49c4"
diff --git a/examples/kubernetes/llama-cpp/templates/deployment.yaml b/examples/kubernetes/llama-cpp/templates/deployment.yaml
index 2fb09f79cc282..223b5dd8a7d68 100644
--- a/examples/kubernetes/llama-cpp/templates/deployment.yaml
+++ b/examples/kubernetes/llama-cpp/templates/deployment.yaml
@@ -33,44 +33,26 @@ spec:
       imagePullSecrets:
         {{- toYaml . | nindent 8 }}
       {{- end }}
-      securityContext:
-        {{- toYaml .Values.podSecurityContext | nindent 8 }}
-      initContainers:
-        - name: wait-model
-          securityContext:
-            {{- toYaml .Values.securityContext | nindent 12 }}
-          image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}-{{ .Values.images.downloader.tag }}
-          env:
-            - name: MODEL_PATH
-              value: {{ .Values.model.path }}
-            - name: MODEL_FILE
-              value: {{  regexReplaceAll "(.*/)?([^/]+).gguf" .Values.model.file "${2}.gguf" }}
-            - name: MODEL_SHA256
-              value: {{ .Values.model.sha256 }}
-            - name: MODEL_DOWNLOAD_REPO
-              value: {{ .Values.model.repo }}
-            - name: MODEL_DOWNLOAD_FILE
-              value: {{ .Values.model.file }}
-          command:
-            - sh
-            - -c
-          args:
-            - >
-              set -eux;
-              while ! echo "${MODEL_SHA256} *${MODEL_PATH}/${MODEL_FILE}" | sha256sum -c - ; do
-                echo "waiting for model file${MODEL_PATH}/${MODEL_FILE}=${MODEL_SHA256}";
-                sleep 1;
-              done
-          volumeMounts:
-            - mountPath: {{ .Values.model.path }}
-              name: models
-              readOnly: true
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
       containers:
         - name: {{ .Chart.Name }}
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.images.server.repository }}:{{ .Values.images.server.name }}-{{ .Values.images.server.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.images.pullPolicy }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
           command:
             - {{ .Values.server.command }}
           args:
@@ -105,21 +87,6 @@ spec:
             - name: http
               containerPort: {{ .Values.server.port }}
               protocol: TCP
-          startupProbe:
-            httpGet:
-              path: /health
-              port: {{ .Values.server.port }}
-
-          livenessProbe:
-            httpGet:
-              path: /health
-              port: {{ .Values.server.port }}
-
-          readinessProbe:
-            httpGet:
-              path: /health?fail_on_no_slot
-              port: {{ .Values.server.port }}
-
           {{- with .Values.volumeMounts }}
           volumeMounts:
             {{- toYaml . | nindent 12 }}
@@ -132,15 +99,4 @@ spec:
         - name: models
           persistentVolumeClaim:
             claimName: {{ include "server.llama.cpp.fullname" . }}
-      {{- with .Values.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
+            readOnly: true
diff --git a/examples/kubernetes/llama-cpp/templates/jobs.yaml b/examples/kubernetes/llama-cpp/templates/jobs.yaml
index 76942b0a7a7fd..9142bfbfa996d 100644
--- a/examples/kubernetes/llama-cpp/templates/jobs.yaml
+++ b/examples/kubernetes/llama-cpp/templates/jobs.yaml
@@ -1,13 +1,13 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name:  download-model-{{ include "server.llama.cpp.fullname" . }}
+  name:  {{ include "server.llama.cpp.fullname" . }}-download-model
   labels:
     {{- include "server.llama.cpp.labels" . | nindent 4 }}
 spec:
   template:
     metadata:
-      name: download-model-{{ include "server.llama.cpp.fullname" . }}
+      name: {{ include "server.llama.cpp.fullname" . }}-download-model
       {{- with .Values.podAnnotations }}
       annotations:
         {{- toYaml . | nindent 8 }}
@@ -22,7 +22,7 @@ spec:
         - name: {{ include "server.llama.cpp.fullname" . }}-download-model
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
-          image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}-{{ .Values.images.downloader.tag }}
+          image: {{ .Values.images.downloader.repository }}:{{ .Values.images.downloader.name }}{{if  .Values.images.downloader.tag }}-{{end}}{{ .Values.images.downloader.tag }}
           env:
             - name: MODEL_PATH
               value: {{ .Values.model.path }}
@@ -50,4 +50,17 @@ spec:
       volumes:
         - name: models
           persistentVolumeClaim:
-            claimName: {{ include "server.llama.cpp.fullname" . }}
\ No newline at end of file
+            claimName: {{ include "server.llama.cpp.fullname" . }}
+            readOnly: false
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
\ No newline at end of file
diff --git a/examples/kubernetes/llama-cpp/templates/pvc.yaml b/examples/kubernetes/llama-cpp/templates/pvc.yaml
index 681873c379598..e2c40e5acaea4 100644
--- a/examples/kubernetes/llama-cpp/templates/pvc.yaml
+++ b/examples/kubernetes/llama-cpp/templates/pvc.yaml
@@ -4,6 +4,8 @@ metadata:
   name: {{ include "server.llama.cpp.fullname" . }}
   labels:
     {{- include "server.llama.cpp.labels" . | nindent 4 }}
+  annotations:
+    helm.sh/resource-policy: "keep"
 spec:
   accessModes:
     - ReadWriteOnce
@@ -11,7 +13,5 @@ spec:
     requests:
       storage: {{ .Values.model.size | quote }}
 {{- if .Values.persistence.storageClass }}
-{{- if (not empty .Values.persistence.storageClass) }}
-  storageClassName: "{{ .Values.persistence.storageClass }}"
-{{- end }}
+  storageClassName: {{ .Values.persistence.storageClass }}
 {{- end }}
\ No newline at end of file
diff --git a/examples/kubernetes/llama-cpp/templates/service.yaml b/examples/kubernetes/llama-cpp/templates/service.yaml
index 8e5a222504c1c..09cab5f400968 100644
--- a/examples/kubernetes/llama-cpp/templates/service.yaml
+++ b/examples/kubernetes/llama-cpp/templates/service.yaml
@@ -8,7 +8,7 @@ spec:
   type: {{ .Values.service.type }}
   ports:
     - port: {{ .Values.service.port }}
-      targetPort: {{ .Values.service.port }}
+      targetPort: {{ .Values.server.port }}
       protocol: TCP
       name: http
   selector: