Align with new CodeGen workloads

New CodeGen example is using MegaService, modify helm chart accordingly. Signed-off-by: Dolpher Du <[email protected]>
opea-project · May 24, 2024 · 6ae295d · 6ae295d
1 parent e8aa8c6
commit 6ae295d
Show file tree

Hide file tree

Showing 34 changed files with 459 additions and 169 deletions.
diff --git a/helm-charts/codegen/Chart.yaml b/helm-charts/codegen/Chart.yaml
@@ -6,7 +6,7 @@ name: codegen
 description: The Helm chart to deploy CodeGen
 type: application
 dependencies:
-  - name: tgi
+  - name: llm-uservice
     version: "0.1.0"
 version: 0.1.0
 appVersion: "1.0.0"
diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md
@@ -2,7 +2,7 @@
 
 Helm chart for deploying CodeGen service.
 
-CodeGen depends on tgi, refer to tgi for more config details.
+CodeGen depends on LLM microservice, refer to llm-uservice for more config details.
 
 ## Installing the Chart
 
@@ -12,17 +12,15 @@ To install the chart, run the following:
 $ export HFTOKEN="insert-your-huggingface-token-here"
 $ export MODELDIR="/mnt"
 $ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B"
-$ helm install codegen codegen --set hfToken=${HFTOKEN} --set tgi.hftgi.volume=${MODELDIR} --set tgi.hftgi.modelId=${MODELNAME}
+$ helm install codegen codegen --set llm-uservice.HUGGINGFACE_API_TOKEN=${HFTOKEN} --set llm-uservice.tgi.volume=${MODELDIR} --set llm-uservice.tgi.LLM_MODEL_ID=${MODELNAME}
 ```
 
 ## Values
 
-| Key               | Type   | Default                               | Description                                                                                                                              |
-| ----------------- | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
-| hfToken           | string | `""`                                  | Your own Hugging Face API token                                                                                                          |
-| image.repository  | string | `"intel/gen-ai-examples"`             |                                                                                                                                          |
-| image.tag         | string | `"copilot"`                           |                                                                                                                                          |
-| service.port      | string | `"80"`                                |                                                                                                                                          |
-| tgi.hftgi.modelId | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                 |
-| tgi.hftgi.port    | string | `"80"`                                | Hugging Face Text Generation Inference service port                                                                                      |
-| tgi.hftgi.volume  | string | `"/mnt"`                              | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |
+| Key                                   | Type   | Default                                          | Description                                                                                                                              |
+| ------------------------------------- | ------ | ------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| image.repository                      | string | `"opea/gen-ai-comps:codegen-megaservice-server"` |                                                                                                                                          |
+| service.port                          | string | `"6666"`                                         |                                                                                                                                          |
+| llm-uservice.HUGGINGFACEHUB_API_TOKEN | string | `""`                                             | Your own Hugging Face API token                                                                                                          |
+| llm-uservice.tgi.LLM_MODEL_ID         | string | `"ise-uiuc/Magicoder-S-DS-6.7B"`                 | Models id from https://huggingface.co/, or predownloaded model directory                                                                 |
+| llm-uservice.tgi.volume               | string | `"/mnt"`                                         | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |
diff --git a/helm-charts/codegen/charts/tgi/.helmignore → ...s/codegen/charts/llm-uservice/.helmignore b/helm-charts/codegen/charts/tgi/.helmignore → ...s/codegen/charts/llm-uservice/.helmignore
diff --git a/helm-charts/codegen/charts/llm-uservice/Chart.yaml b/helm-charts/codegen/charts/llm-uservice/Chart.yaml
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: llm-uservice
+description: The Helm chart for deploying llm as microservice
+type: application
+dependencies:
+  - name: tgi
+    version: "0.1.0"
+version: 0.1.0
+# The llm microservice server version
+appVersion: "1.0.0"
diff --git a/helm-charts/codegen/charts/llm-uservice/README.md b/helm-charts/codegen/charts/llm-uservice/README.md
@@ -0,0 +1,27 @@
+# llm-uservice
+
+Helm chart for deploying LLM microservice.
+
+llm-uservice depends on TGI, refer to tgi for more config details.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+$ export HFTOKEN="insert-your-huggingface-token-here"
+$ export MODELDIR="/mnt"
+$ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B"
+$ helm install llm llm-uservice --set HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME}
+```
+
+## Values
+
+| Key                      | Type   | Default                               | Description                                                                                                                              |
+| ------------------------ | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| HUGGINGFACEHUB_API_TOKEN | string | `""`                                  | Your own Hugging Face API token                                                                                                          |
+| image.repository         | string | `"opea/gen-ai-comps:llm-tgi-server"`  |                                                                                                                                          |
+| service.port             | string | `"9000"`                              |                                                                                                                                          |
+| tgi.LLM_MODEL_ID         | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory                                                                 |
+| tgi.port                 | string | `"80"`                                | Hugging Face Text Generation Inference service port                                                                                      |
+| tgi.volume               | string | `"/mnt"`                              | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |
diff --git a/helm-charts/codegen/charts/llm-uservice/charts/tgi/.helmignore b/helm-charts/codegen/charts/llm-uservice/charts/tgi/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/codegen/charts/tgi/Chart.yaml → ...charts/llm-uservice/charts/tgi/Chart.yaml b/helm-charts/codegen/charts/tgi/Chart.yaml → ...charts/llm-uservice/charts/tgi/Chart.yaml
diff --git a/helm-charts/codegen/charts/llm-uservice/charts/tgi/README.md b/helm-charts/codegen/charts/llm-uservice/charts/tgi/README.md
@@ -0,0 +1,32 @@
+# tgi
+
+Helm chart for deploying Hugging Face Text Generation Inference service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+$ export MODELDIR=/mnt
+$ export MODELNAME="bigscience/bloom-560m"
+$ helm install tgi tgi --set volume=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME}
+```
+
+By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/home/ubuntu/hfmodels
+
+MODELNAME="/data/models--bigscience--bloom-560m"
+
+## Values
+
+| Key          | Type   | Default                                           | Description                                                                                                                              |
+| ------------ | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                 |
+| port         | string | `"80"`                                            | Hugging Face Text Generation Inference service port                                                                                      |
+| volume       | string | `"/mnt"`                                          | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory |
+| image        | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                          |
+| tag          | string | `"1.4"`                                           |                                                                                                                                          |
+| service.port | string | `"80"`                                            | The service port                                                                                                                         |
diff --git a/...ts/codegen/charts/tgi/templates/NOTES.txt → ...m-uservice/charts/tgi/templates/NOTES.txt b/...ts/codegen/charts/tgi/templates/NOTES.txt → ...m-uservice/charts/tgi/templates/NOTES.txt
diff --git a/...codegen/charts/tgi/templates/_helpers.tpl → ...service/charts/tgi/templates/_helpers.tpl b/...codegen/charts/tgi/templates/_helpers.tpl → ...service/charts/tgi/templates/_helpers.tpl
diff --git a/...egen/charts/tgi/templates/deployment.yaml → ...vice/charts/tgi/templates/deployment.yaml b/...egen/charts/tgi/templates/deployment.yaml → ...vice/charts/tgi/templates/deployment.yaml
@@ -31,9 +31,9 @@ spec:
         - name: {{ .Chart.Name }}
           env:
             - name: MODEL_ID
-              value: {{ .Values.hftgi.modelId }}
+              value: {{ .Values.LLM_MODEL_ID }}
             - name: PORT
-              value: {{ .Values.hftgi.port | quote }}
+              value: {{ .Values.port | quote }}
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
@@ -43,7 +43,7 @@ spec:
               name: model-volume
           ports:
             - name: http
-              containerPort: 80
+              containerPort: {{ .Values.port }}
               protocol: TCP
 #          livenessProbe:
 #            httpGet:
@@ -52,25 +52,25 @@ spec:
           startupProbe:
             httpGet:
               path: /
-              port: http
+              port: {{ .Values.port }}
             initialDelaySeconds: 5
             periodSeconds: 5
             failureThreshold: 120
           readinessProbe:
             httpGet:
               path: /
-              port: http
+              port: {{ .Values.port }}
             initialDelaySeconds: 5
-            periodSeconds: 30
-            failureThreshold: 20
+            periodSeconds: 5
+            failureThreshold: 120
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
             #          command:
             #            - "/usr/bin/bash"
       volumes:
         - name: model-volume
           hostPath:
-            path: {{ .Values.hftgi.volume }}
+            path: {{ .Values.volume }}
             type: Directory
       {{- with .Values.nodeSelector }}
       nodeSelector:

diff --git a/...codegen/charts/tgi/templates/service.yaml → ...service/charts/tgi/templates/service.yaml b/...codegen/charts/tgi/templates/service.yaml → ...service/charts/tgi/templates/service.yaml
@@ -11,7 +11,7 @@ spec:
   type: {{ .Values.service.type }}
   ports:
     - port: {{ .Values.service.port }}
-      targetPort: {{ .Values.hftgi.port }}
+      targetPort: {{ .Values.port }}
       protocol: TCP
       name: tgi
   selector:

diff --git a/helm-charts/codegen/charts/tgi/values.yaml → ...harts/llm-uservice/charts/tgi/values.yaml b/helm-charts/codegen/charts/tgi/values.yaml → ...harts/llm-uservice/charts/tgi/values.yaml
@@ -7,11 +7,10 @@
 
 replicaCount: 1
 
-hftgi:
-  modelId: bigscience/bloom-560m
-  # modelId: /data/OpenCodeInterpreter-DS-6.7B
-  port: 80
-  volume: /mnt
+LLM_MODEL_ID: bigscience/bloom-560m
+# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B
+port: 80
+volume: /mnt
 
 image:
   repository: ghcr.io/huggingface/text-generation-inference

diff --git a/helm-charts/codegen/charts/llm-uservice/templates/NOTES.txt b/helm-charts/codegen/charts/llm-uservice/templates/NOTES.txt
@@ -0,0 +1,16 @@
+1. Get the application URL by running these commands:
+{{- if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llm-uservice.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llm-uservice.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llm-uservice.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "llm-uservice.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
diff --git a/helm-charts/codegen/charts/llm-uservice/templates/_helpers.tpl b/helm-charts/codegen/charts/llm-uservice/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm-uservice.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "llm-uservice.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm-uservice.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm-uservice.labels" -}}
+helm.sh/chart: {{ include "llm-uservice.chart" . }}
+{{ include "llm-uservice.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llm-uservice.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-uservice.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/codegen/charts/llm-uservice/templates/deployment.yaml b/helm-charts/codegen/charts/llm-uservice/templates/deployment.yaml
@@ -0,0 +1,74 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "llm-uservice.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "llm-uservice.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          env:
+            - name: TGI_LLM_ENDPOINT
+              value: "http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }}"
+            - name: HUGGINGFACEHUB_API_TOKEN
+              value: {{ .Values.HUGGINGFACEHUB_API_TOKEN | quote}}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          startupProbe:
+            exec:
+              command:
+              - curl
+              - http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }}
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 120
+#          livenessProbe:
+#            httpGet:
+#              path: /
+#              port: 9000
+#          readinessProbe:
+#            httpGet:
+#              path: /
+#              port: 9000
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/helm-charts/codegen/charts/llm-uservice/templates/service.yaml b/helm-charts/codegen/charts/llm-uservice/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    {{- include "llm-uservice.selectorLabels" . | nindent 4 }}