diff --git a/helm-charts/codegen/Chart.yaml b/helm-charts/codegen/Chart.yaml index a8ef18ab..785e332e 100644 --- a/helm-charts/codegen/Chart.yaml +++ b/helm-charts/codegen/Chart.yaml @@ -6,7 +6,7 @@ name: codegen description: The Helm chart to deploy CodeGen type: application dependencies: - - name: tgi + - name: llm-uservice version: "0.1.0" version: 0.1.0 appVersion: "1.0.0" diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md index fda0bcca..4b4f8d1e 100644 --- a/helm-charts/codegen/README.md +++ b/helm-charts/codegen/README.md @@ -2,7 +2,7 @@ Helm chart for deploying CodeGen service. -CodeGen depends on tgi, refer to tgi for more config details. +CodeGen depends on LLM microservice, refer to llm-uservice for more config details. ## Installing the Chart @@ -12,17 +12,15 @@ To install the chart, run the following: $ export HFTOKEN="insert-your-huggingface-token-here" $ export MODELDIR="/mnt" $ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B" -$ helm install codegen codegen --set hfToken=${HFTOKEN} --set tgi.hftgi.volume=${MODELDIR} --set tgi.hftgi.modelId=${MODELNAME} +$ helm install codegen codegen --set llm-uservice.HUGGINGFACE_API_TOKEN=${HFTOKEN} --set llm-uservice.tgi.volume=${MODELDIR} --set llm-uservice.tgi.LLM_MODEL_ID=${MODELNAME} ``` ## Values -| Key | Type | Default | Description | -| ----------------- | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -| hfToken | string | `""` | Your own Hugging Face API token | -| image.repository | string | `"intel/gen-ai-examples"` | | -| image.tag | string | `"copilot"` | | -| service.port | string | `"80"` | | -| tgi.hftgi.modelId | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | -| tgi.hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | -| tgi.hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | +| Key | Type | Default | Description | +| ------------------------------------- | ------ | ------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/gen-ai-comps:codegen-megaservice-server"` | | +| service.port | string | `"6666"` | | +| llm-uservice.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| llm-uservice.tgi.LLM_MODEL_ID | string | `"ise-uiuc/Magicoder-S-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | +| llm-uservice.tgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | diff --git a/helm-charts/codegen/charts/tgi/.helmignore b/helm-charts/codegen/charts/llm-uservice/.helmignore similarity index 100% rename from helm-charts/codegen/charts/tgi/.helmignore rename to helm-charts/codegen/charts/llm-uservice/.helmignore diff --git a/helm-charts/codegen/charts/llm-uservice/Chart.yaml b/helm-charts/codegen/charts/llm-uservice/Chart.yaml new file mode 100644 index 00000000..68b49c3e --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/Chart.yaml @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: llm-uservice +description: The Helm chart for deploying llm as microservice +type: application +dependencies: + - name: tgi + version: "0.1.0" +version: 0.1.0 +# The llm microservice server version +appVersion: "1.0.0" diff --git a/helm-charts/codegen/charts/llm-uservice/README.md b/helm-charts/codegen/charts/llm-uservice/README.md new file mode 100644 index 00000000..b80ae5c3 --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/README.md @@ -0,0 +1,27 @@ +# llm-uservice + +Helm chart for deploying LLM microservice. + +llm-uservice depends on TGI, refer to tgi for more config details. + +## Installing the Chart + +To install the chart, run the following: + +```console +$ export HFTOKEN="insert-your-huggingface-token-here" +$ export MODELDIR="/mnt" +$ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B" +$ helm install llm llm-uservice --set HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------ | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/gen-ai-comps:llm-tgi-server"` | | +| service.port | string | `"9000"` | | +| tgi.LLM_MODEL_ID | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | +| tgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | +| tgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | diff --git a/helm-charts/codegen/charts/llm-uservice/charts/tgi/.helmignore b/helm-charts/codegen/charts/llm-uservice/charts/tgi/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/charts/tgi/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/codegen/charts/tgi/Chart.yaml b/helm-charts/codegen/charts/llm-uservice/charts/tgi/Chart.yaml similarity index 100% rename from helm-charts/codegen/charts/tgi/Chart.yaml rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/Chart.yaml diff --git a/helm-charts/codegen/charts/llm-uservice/charts/tgi/README.md b/helm-charts/codegen/charts/llm-uservice/charts/tgi/README.md new file mode 100644 index 00000000..36b3f996 --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/charts/tgi/README.md @@ -0,0 +1,32 @@ +# tgi + +Helm chart for deploying Hugging Face Text Generation Inference service. + +## Installing the Chart + +To install the chart, run the following: + +```console +$ export MODELDIR=/mnt +$ export MODELNAME="bigscience/bloom-560m" +$ helm install tgi tgi --set volume=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} +``` + +By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/home/ubuntu/hfmodels + +MODELNAME="/data/models--bigscience--bloom-560m" + +## Values + +| Key | Type | Default | Description | +| ------------ | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | +| port | string | `"80"` | Hugging Face Text Generation Inference service port | +| volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | +| image | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| tag | string | `"1.4"` | | +| service.port | string | `"80"` | The service port | diff --git a/helm-charts/codegen/charts/tgi/templates/NOTES.txt b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/NOTES.txt similarity index 100% rename from helm-charts/codegen/charts/tgi/templates/NOTES.txt rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/NOTES.txt diff --git a/helm-charts/codegen/charts/tgi/templates/_helpers.tpl b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/_helpers.tpl similarity index 100% rename from helm-charts/codegen/charts/tgi/templates/_helpers.tpl rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/_helpers.tpl diff --git a/helm-charts/codegen/charts/tgi/templates/deployment.yaml b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/deployment.yaml similarity index 87% rename from helm-charts/codegen/charts/tgi/templates/deployment.yaml rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/deployment.yaml index 788e3746..e5ddf058 100644 --- a/helm-charts/codegen/charts/tgi/templates/deployment.yaml +++ b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/deployment.yaml @@ -31,9 +31,9 @@ spec: - name: {{ .Chart.Name }} env: - name: MODEL_ID - value: {{ .Values.hftgi.modelId }} + value: {{ .Values.LLM_MODEL_ID }} - name: PORT - value: {{ .Values.hftgi.port | quote }} + value: {{ .Values.port | quote }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -43,7 +43,7 @@ spec: name: model-volume ports: - name: http - containerPort: 80 + containerPort: {{ .Values.port }} protocol: TCP # livenessProbe: # httpGet: @@ -52,17 +52,17 @@ spec: startupProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 readinessProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 - periodSeconds: 30 - failureThreshold: 20 + periodSeconds: 5 + failureThreshold: 120 resources: {{- toYaml .Values.resources | nindent 12 }} # command: @@ -70,7 +70,7 @@ spec: volumes: - name: model-volume hostPath: - path: {{ .Values.hftgi.volume }} + path: {{ .Values.volume }} type: Directory {{- with .Values.nodeSelector }} nodeSelector: diff --git a/helm-charts/codegen/charts/tgi/templates/service.yaml b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/service.yaml similarity index 90% rename from helm-charts/codegen/charts/tgi/templates/service.yaml rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/service.yaml index 5c224bbd..95f0cfa7 100644 --- a/helm-charts/codegen/charts/tgi/templates/service.yaml +++ b/helm-charts/codegen/charts/llm-uservice/charts/tgi/templates/service.yaml @@ -11,7 +11,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: {{ .Values.hftgi.port }} + targetPort: {{ .Values.port }} protocol: TCP name: tgi selector: diff --git a/helm-charts/codegen/charts/tgi/values.yaml b/helm-charts/codegen/charts/llm-uservice/charts/tgi/values.yaml similarity index 91% rename from helm-charts/codegen/charts/tgi/values.yaml rename to helm-charts/codegen/charts/llm-uservice/charts/tgi/values.yaml index dcccff5c..0b9336e9 100644 --- a/helm-charts/codegen/charts/tgi/values.yaml +++ b/helm-charts/codegen/charts/llm-uservice/charts/tgi/values.yaml @@ -7,11 +7,10 @@ replicaCount: 1 -hftgi: - modelId: bigscience/bloom-560m - # modelId: /data/OpenCodeInterpreter-DS-6.7B - port: 80 - volume: /mnt +LLM_MODEL_ID: bigscience/bloom-560m +# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B +port: 80 +volume: /mnt image: repository: ghcr.io/huggingface/text-generation-inference diff --git a/helm-charts/codegen/charts/llm-uservice/templates/NOTES.txt b/helm-charts/codegen/charts/llm-uservice/templates/NOTES.txt new file mode 100644 index 00000000..0e1ab0bc --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/templates/NOTES.txt @@ -0,0 +1,16 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llm-uservice.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llm-uservice.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llm-uservice.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "llm-uservice.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/helm-charts/codegen/charts/llm-uservice/templates/_helpers.tpl b/helm-charts/codegen/charts/llm-uservice/templates/_helpers.tpl new file mode 100644 index 00000000..d67db64c --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "llm-uservice.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "llm-uservice.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "llm-uservice.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "llm-uservice.labels" -}} +helm.sh/chart: {{ include "llm-uservice.chart" . }} +{{ include "llm-uservice.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "llm-uservice.selectorLabels" -}} +app.kubernetes.io/name: {{ include "llm-uservice.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "llm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/codegen/charts/llm-uservice/templates/deployment.yaml b/helm-charts/codegen/charts/llm-uservice/templates/deployment.yaml new file mode 100644 index 00000000..77a99bdd --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/templates/deployment.yaml @@ -0,0 +1,74 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llm-uservice.fullname" . }} + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "llm-uservice.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "llm-uservice.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + env: + - name: TGI_LLM_ENDPOINT + value: "http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }}" + - name: HUGGINGFACEHUB_API_TOKEN + value: {{ .Values.HUGGINGFACEHUB_API_TOKEN | quote}} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + startupProbe: + exec: + command: + - curl + - http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }} + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 +# livenessProbe: +# httpGet: +# path: / +# port: 9000 +# readinessProbe: +# httpGet: +# path: / +# port: 9000 + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/helm-charts/codegen/charts/llm-uservice/templates/service.yaml b/helm-charts/codegen/charts/llm-uservice/templates/service.yaml new file mode 100644 index 00000000..06e13b8c --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "llm-uservice.fullname" . }} + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + {{- include "llm-uservice.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/codegen/charts/llm-uservice/values.yaml b/helm-charts/codegen/charts/llm-uservice/values.yaml new file mode 100644 index 00000000..983395d7 --- /dev/null +++ b/helm-charts/codegen/charts/llm-uservice/values.yaml @@ -0,0 +1,69 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for llm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 +HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" +# TGI_LLM_ENDPOINT: "http://automaticallydetected" + +image: + repository: opea/gen-ai-comps:llm-tgi-server + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + # tag: "1.0" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + # The default port for llm service is 9000 + port: 9000 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# To override values in subchart tgi +tgi: + LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B + # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B + port: 80 + volume: /mnt + + image: + repository: ghcr.io/huggingface/text-generation-inference + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "1.4" diff --git a/helm-charts/codegen/charts/tgi/README.md b/helm-charts/codegen/charts/tgi/README.md deleted file mode 100644 index 36a3061e..00000000 --- a/helm-charts/codegen/charts/tgi/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# tgi - -Helm chart for deploying Hugging Face Text Generation Inference service. - -## Installing the Chart - -To install the chart, run the following: - -```console -$ export MODELDIR=/mnt -$ export MODELNAME="bigscience/bloom-560m" -$ helm install tgi tgi --set hftgi.volume=${MODELDIR} --set hftgi.modelId=${MODELNAME} -``` - -By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. - -If you already cached the model locally, you can pass it to container like this example: - -MODELDIR=/home/ubuntu/hfmodels - -MODELNAME="/data/models--bigscience--bloom-560m" - -## Values - -| Key | Type | Default | Description | -| ------------- | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -| hftgi.modelId | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | -| hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | -| hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | -| hftgi.image | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| hftgi.tag | string | `"1.4"` | | -| service.port | string | `"80"` | The service port | diff --git a/helm-charts/codegen/templates/deployment.yaml b/helm-charts/codegen/templates/deployment.yaml index 10ca83d3..cc7c0b35 100644 --- a/helm-charts/codegen/templates/deployment.yaml +++ b/helm-charts/codegen/templates/deployment.yaml @@ -30,36 +30,32 @@ spec: containers: - name: {{ .Release.Name }} env: - - name: TGI_ENDPOINT - value: "http://{{ .Release.Name }}-tgi" - - name: HUGGINGFACEHUB_API_TOKEN - value: {{ .Values.hfToken | quote}} + - name: MEGA_SERVICE_HOST_IP + value: {{ .Release.Name }}-llm-uservice securityContext: {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + image: "{{ .Values.image.repository }}" imagePullPolicy: {{ .Values.image.pullPolicy }} - command: ["/usr/local/bin/python"] - args: ["server.py"] ports: - name: codegen - containerPort: 8000 + containerPort: {{ .Values.port }} protocol: TCP - startupProbe: - exec: - command: - - curl - - http://{{ .Release.Name }}-tgi - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - livenessProbe: - httpGet: - path: / - port: 8000 - readinessProbe: - httpGet: - path: / - port: 8000 + # startupProbe: + # httpGet: + # host: {{ .Release.Name }}-llm-uservice + # port: {{ index .Values "llm-uservice" "service" "port" }} + # path: / + # initialDelaySeconds: 5 + # periodSeconds: 5 + # failureThreshold: 120 + # livenessProbe: + # httpGet: + # path: / + # port: {{ .Values.port }} + # readinessProbe: + # httpGet: + # path: / + # port: {{ .Values.port }} resources: {{- toYaml .Values.resources | nindent 12 }} {{- with .Values.nodeSelector }} diff --git a/helm-charts/codegen/templates/service.yaml b/helm-charts/codegen/templates/service.yaml index c0655c4f..aec13682 100644 --- a/helm-charts/codegen/templates/service.yaml +++ b/helm-charts/codegen/templates/service.yaml @@ -11,7 +11,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: 8000 + targetPort: {{ .Values.port }} protocol: TCP name: codegen selector: diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml index 4a91685a..f524e55b 100644 --- a/helm-charts/codegen/values.yaml +++ b/helm-charts/codegen/values.yaml @@ -6,29 +6,25 @@ # Declare variables to be passed into your templates. replicaCount: 1 -hfToken: "insert-your-huggingface-token-here" -# tgiEndpoint: "http://automaticallydetected" image: - repository: intel/gen-ai-examples + repository: opea/gen-ai-comps:codegen-megaservice-server pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. - tag: "copilot" + # tag: "1.0" +port: 6666 service: type: ClusterIP - port: 80 + port: 6666 -# To override values in subchart tgi -tgi: - hftgi: - modelId: m-a-p/OpenCodeInterpreter-DS-6.7B - # modelId: /data/OpenCodeInterpreter-DS-6.7B - port: 80 +# To override values in subchart llm-uservice +llm-uservice: + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + service: + port: 9000 +# To override values in subchart llm-uservice + tgi: + LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B + # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B volume: /mnt - - image: - repository: ghcr.io/huggingface/text-generation-inference - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "1.4" diff --git a/helm-charts/common/llm-uservice/Chart.yaml b/helm-charts/common/llm-uservice/Chart.yaml index b32507ca..68b49c3e 100644 --- a/helm-charts/common/llm-uservice/Chart.yaml +++ b/helm-charts/common/llm-uservice/Chart.yaml @@ -5,6 +5,9 @@ apiVersion: v2 name: llm-uservice description: The Helm chart for deploying llm as microservice type: application +dependencies: + - name: tgi + version: "0.1.0" version: 0.1.0 # The llm microservice server version -appVersion: "1.0" +appVersion: "1.0.0" diff --git a/helm-charts/common/llm-uservice/README.md b/helm-charts/common/llm-uservice/README.md index 9e738707..b80ae5c3 100644 --- a/helm-charts/common/llm-uservice/README.md +++ b/helm-charts/common/llm-uservice/README.md @@ -1,8 +1,8 @@ # llm-uservice -Helm chart for deploying llm microservice. +Helm chart for deploying LLM microservice. -llm-uservice depends on tgi, refer to tgi for more config details. +llm-uservice depends on TGI, refer to tgi for more config details. ## Installing the Chart @@ -12,17 +12,16 @@ To install the chart, run the following: $ export HFTOKEN="insert-your-huggingface-token-here" $ export MODELDIR="/mnt" $ export MODELNAME="m-a-p/OpenCodeInterpreter-DS-6.7B" -$ helm install codegen codegen --set hfToken=${HFTOKEN} --set tgi.hftgi.volume=${MODELDIR} --set tgi.hftgi.modelId=${MODELNAME} +$ helm install llm llm-uservice --set HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.volume=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} ``` ## Values -| Key | Type | Default | Description | -| ----------------- | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -| hfToken | string | `""` | Your own Hugging Face API token | -| image.repository | string | `"intel/gen-ai-examples"` | | -| image.tag | string | `"copilot"` | | -| service.port | string | `"9000"` | | -| tgi.hftgi.modelId | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | -| tgi.hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | -| tgi.hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | +| Key | Type | Default | Description | +| ------------------------ | ------ | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/gen-ai-comps:llm-tgi-server"` | | +| service.port | string | `"9000"` | | +| tgi.LLM_MODEL_ID | string | `"m-a-p/OpenCodeInterpreter-DS-6.7B"` | Models id from https://huggingface.co/, or predownloaded model directory | +| tgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | +| tgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | diff --git a/helm-charts/common/llm-uservice/charts/tgi/README.md b/helm-charts/common/llm-uservice/charts/tgi/README.md index 36a3061e..36b3f996 100644 --- a/helm-charts/common/llm-uservice/charts/tgi/README.md +++ b/helm-charts/common/llm-uservice/charts/tgi/README.md @@ -9,7 +9,7 @@ To install the chart, run the following: ```console $ export MODELDIR=/mnt $ export MODELNAME="bigscience/bloom-560m" -$ helm install tgi tgi --set hftgi.volume=${MODELDIR} --set hftgi.modelId=${MODELNAME} +$ helm install tgi tgi --set volume=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} ``` By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. @@ -22,11 +22,11 @@ MODELNAME="/data/models--bigscience--bloom-560m" ## Values -| Key | Type | Default | Description | -| ------------- | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -| hftgi.modelId | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | -| hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | -| hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | -| hftgi.image | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| hftgi.tag | string | `"1.4"` | | -| service.port | string | `"80"` | The service port | +| Key | Type | Default | Description | +| ------------ | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | +| port | string | `"80"` | Hugging Face Text Generation Inference service port | +| volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | +| image | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| tag | string | `"1.4"` | | +| service.port | string | `"80"` | The service port | diff --git a/helm-charts/common/llm-uservice/charts/tgi/templates/deployment.yaml b/helm-charts/common/llm-uservice/charts/tgi/templates/deployment.yaml index 788e3746..e5ddf058 100644 --- a/helm-charts/common/llm-uservice/charts/tgi/templates/deployment.yaml +++ b/helm-charts/common/llm-uservice/charts/tgi/templates/deployment.yaml @@ -31,9 +31,9 @@ spec: - name: {{ .Chart.Name }} env: - name: MODEL_ID - value: {{ .Values.hftgi.modelId }} + value: {{ .Values.LLM_MODEL_ID }} - name: PORT - value: {{ .Values.hftgi.port | quote }} + value: {{ .Values.port | quote }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -43,7 +43,7 @@ spec: name: model-volume ports: - name: http - containerPort: 80 + containerPort: {{ .Values.port }} protocol: TCP # livenessProbe: # httpGet: @@ -52,17 +52,17 @@ spec: startupProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 readinessProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 - periodSeconds: 30 - failureThreshold: 20 + periodSeconds: 5 + failureThreshold: 120 resources: {{- toYaml .Values.resources | nindent 12 }} # command: @@ -70,7 +70,7 @@ spec: volumes: - name: model-volume hostPath: - path: {{ .Values.hftgi.volume }} + path: {{ .Values.volume }} type: Directory {{- with .Values.nodeSelector }} nodeSelector: diff --git a/helm-charts/common/llm-uservice/charts/tgi/templates/service.yaml b/helm-charts/common/llm-uservice/charts/tgi/templates/service.yaml index 5c224bbd..95f0cfa7 100644 --- a/helm-charts/common/llm-uservice/charts/tgi/templates/service.yaml +++ b/helm-charts/common/llm-uservice/charts/tgi/templates/service.yaml @@ -11,7 +11,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: {{ .Values.hftgi.port }} + targetPort: {{ .Values.port }} protocol: TCP name: tgi selector: diff --git a/helm-charts/common/llm-uservice/charts/tgi/values.yaml b/helm-charts/common/llm-uservice/charts/tgi/values.yaml index dcccff5c..0b9336e9 100644 --- a/helm-charts/common/llm-uservice/charts/tgi/values.yaml +++ b/helm-charts/common/llm-uservice/charts/tgi/values.yaml @@ -7,11 +7,10 @@ replicaCount: 1 -hftgi: - modelId: bigscience/bloom-560m - # modelId: /data/OpenCodeInterpreter-DS-6.7B - port: 80 - volume: /mnt +LLM_MODEL_ID: bigscience/bloom-560m +# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B +port: 80 +volume: /mnt image: repository: ghcr.io/huggingface/text-generation-inference diff --git a/helm-charts/common/llm-uservice/templates/deployment.yaml b/helm-charts/common/llm-uservice/templates/deployment.yaml index 7b56b572..77a99bdd 100644 --- a/helm-charts/common/llm-uservice/templates/deployment.yaml +++ b/helm-charts/common/llm-uservice/templates/deployment.yaml @@ -31,9 +31,9 @@ spec: - name: {{ .Release.Name }} env: - name: TGI_LLM_ENDPOINT - value: "http://{{ .Release.Name }}-tgi" + value: "http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }}" - name: HUGGINGFACEHUB_API_TOKEN - value: {{ .Values.hfToken | quote}} + value: {{ .Values.HUGGINGFACEHUB_API_TOKEN | quote}} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}" @@ -46,7 +46,7 @@ spec: exec: command: - curl - - http://{{ .Release.Name }}-tgi + - http://{{ .Release.Name }}-tgi:{{ .Values.tgi.service.port }} initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml index 126981c3..983395d7 100644 --- a/helm-charts/common/llm-uservice/values.yaml +++ b/helm-charts/common/llm-uservice/values.yaml @@ -6,8 +6,8 @@ # Declare variables to be passed into your templates. replicaCount: 1 -hfToken: "insert-your-huggingface-token-here" -# tgiEndpoint: "http://automaticallydetected" +HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" +# TGI_LLM_ENDPOINT: "http://automaticallydetected" image: repository: opea/gen-ai-comps:llm-tgi-server @@ -57,11 +57,10 @@ affinity: {} # To override values in subchart tgi tgi: - hftgi: - modelId: m-a-p/OpenCodeInterpreter-DS-6.7B - # modelId: /data/OpenCodeInterpreter-DS-6.7B - port: 80 - volume: /mnt + LLM_MODEL_ID: m-a-p/OpenCodeInterpreter-DS-6.7B + # LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B + port: 80 + volume: /mnt image: repository: ghcr.io/huggingface/text-generation-inference diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md index 36a3061e..36b3f996 100644 --- a/helm-charts/common/tgi/README.md +++ b/helm-charts/common/tgi/README.md @@ -9,7 +9,7 @@ To install the chart, run the following: ```console $ export MODELDIR=/mnt $ export MODELNAME="bigscience/bloom-560m" -$ helm install tgi tgi --set hftgi.volume=${MODELDIR} --set hftgi.modelId=${MODELNAME} +$ helm install tgi tgi --set volume=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} ``` By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. @@ -22,11 +22,11 @@ MODELNAME="/data/models--bigscience--bloom-560m" ## Values -| Key | Type | Default | Description | -| ------------- | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | -| hftgi.modelId | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | -| hftgi.port | string | `"80"` | Hugging Face Text Generation Inference service port | -| hftgi.volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | -| hftgi.image | string | `"ghcr.io/huggingface/text-generation-inference"` | | -| hftgi.tag | string | `"1.4"` | | -| service.port | string | `"80"` | The service port | +| Key | Type | Default | Description | +| ------------ | ------ | ------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | +| port | string | `"80"` | Hugging Face Text Generation Inference service port | +| volume | string | `"/mnt"` | Cached models directory, tgi will not download if the model is cached here. The "volume" will be mounted to container as /data directory | +| image | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| tag | string | `"1.4"` | | +| service.port | string | `"80"` | The service port | diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml index 788e3746..e5ddf058 100644 --- a/helm-charts/common/tgi/templates/deployment.yaml +++ b/helm-charts/common/tgi/templates/deployment.yaml @@ -31,9 +31,9 @@ spec: - name: {{ .Chart.Name }} env: - name: MODEL_ID - value: {{ .Values.hftgi.modelId }} + value: {{ .Values.LLM_MODEL_ID }} - name: PORT - value: {{ .Values.hftgi.port | quote }} + value: {{ .Values.port | quote }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -43,7 +43,7 @@ spec: name: model-volume ports: - name: http - containerPort: 80 + containerPort: {{ .Values.port }} protocol: TCP # livenessProbe: # httpGet: @@ -52,17 +52,17 @@ spec: startupProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 periodSeconds: 5 failureThreshold: 120 readinessProbe: httpGet: path: / - port: http + port: {{ .Values.port }} initialDelaySeconds: 5 - periodSeconds: 30 - failureThreshold: 20 + periodSeconds: 5 + failureThreshold: 120 resources: {{- toYaml .Values.resources | nindent 12 }} # command: @@ -70,7 +70,7 @@ spec: volumes: - name: model-volume hostPath: - path: {{ .Values.hftgi.volume }} + path: {{ .Values.volume }} type: Directory {{- with .Values.nodeSelector }} nodeSelector: diff --git a/helm-charts/common/tgi/templates/service.yaml b/helm-charts/common/tgi/templates/service.yaml index 5c224bbd..95f0cfa7 100644 --- a/helm-charts/common/tgi/templates/service.yaml +++ b/helm-charts/common/tgi/templates/service.yaml @@ -11,7 +11,7 @@ spec: type: {{ .Values.service.type }} ports: - port: {{ .Values.service.port }} - targetPort: {{ .Values.hftgi.port }} + targetPort: {{ .Values.port }} protocol: TCP name: tgi selector: diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index dcccff5c..0b9336e9 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -7,11 +7,10 @@ replicaCount: 1 -hftgi: - modelId: bigscience/bloom-560m - # modelId: /data/OpenCodeInterpreter-DS-6.7B - port: 80 - volume: /mnt +LLM_MODEL_ID: bigscience/bloom-560m +# LLM_MODEL_ID: /data/OpenCodeInterpreter-DS-6.7B +port: 80 +volume: /mnt image: repository: ghcr.io/huggingface/text-generation-inference