diff --git a/helm-charts/agent/.helmignore b/helm-charts/agent/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/agent/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/agent/Chart.yaml b/helm-charts/agent/Chart.yaml new file mode 100644 index 000000000..5b261b98a --- /dev/null +++ b/helm-charts/agent/Chart.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: agent +description: The Helm chart for deploying agent microservice +type: application +version: 0-latest +# The llm microservice server version +appVersion: "v1.0" +dependencies: + - name: tgi + version: 0-latest + repository: file://../tgi + condition: tgi.enabled + - name: vllm + version: 0-latest + repository: file://../vllm + condition: vllm.enabled diff --git a/helm-charts/agent/README.md b/helm-charts/agent/README.md new file mode 100644 index 000000000..820de8951 --- /dev/null +++ b/helm-charts/agent/README.md @@ -0,0 +1,46 @@ +# agent + +Helm chart for deploying Agent microservice. + +agent depends on LLM service, you should set llm_endpoint_url as LLM endpoint. + +## Deploy + +### Use external LLM endpoint + +helm install agent oci://ghcr.io/opea-project/charts/agent --set llm_endpoint_url=${YOUR_LLM_ENDPOINT} + +### Deploy with tgi + +helm install agent oci://ghcr.io/opea-project/charts/agent --set tgi.enabled=True + +### Deploy with vllm + +helm install agent oci://ghcr.io/opea-project/charts/agent --set vllm.enabled=True + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/agent 9090:9090` to expose the agent service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9090/v1/chat/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"query":"What is OPEA?"}' +``` + +## Options + +For global options, see Global Options. + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------ | ------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/agent-langchain"` | | +| service.port | string | `"9090"` | | +| llm_endpoint_url | string | `""` | LLM endpoint | +| global.monitoring | bop; | false | Service usage metrics | diff --git a/helm-charts/agent/ci-gaudi-values.yaml b/helm-charts/agent/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/agent/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/agent/gaudi-values.yaml b/helm-charts/agent/gaudi-values.yaml new file mode 100644 index 000000000..91ef5d102 --- /dev/null +++ b/helm-charts/agent/gaudi-values.yaml @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: true + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.6" + resources: + limits: + habana.ai/gaudi: 4 + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + extraCmdArgs: ["--sharded","true","--num-shard","4"] + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/agent/templates/_helpers.tpl b/helm-charts/agent/templates/_helpers.tpl new file mode 100644 index 000000000..1227dfd81 --- /dev/null +++ b/helm-charts/agent/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "agent.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "agent.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "agent.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "agent.labels" -}} +helm.sh/chart: {{ include "agent.chart" . }} +{{ include "agent.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "agent.selectorLabels" -}} +app.kubernetes.io/name: {{ include "agent.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "agent.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "agent.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/agent/templates/configmap.yaml b/helm-charts/agent/templates/configmap.yaml new file mode 100644 index 000000000..62970d7d2 --- /dev/null +++ b/helm-charts/agent/templates/configmap.yaml @@ -0,0 +1,66 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "agent.fullname" . }}-config + labels: + {{- include "agent.labels" . | nindent 4 }} +data: + {{- if .Values.tools }} + tools: {{ .Values.tools | quote }} + {{- end }} + {{- if .Values.llm_endpoint_url }} + llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }} + {{- else }} + llm_endpoint_url: "http://{{ .Release.Name }}-tgi" + {{- end }} + # {{- if .Values.port }} + # port: {{ .Values.port | quote }} + # {{- end }} + {{- if .Values.model }} + model: {{ .Values.model | quote }} + {{- end }} + {{- if .Values.streaming }} + streaming: {{ .Values.streaming | quote }} + {{- end }} + {{- if .Values.temperature }} + temperature: {{ .Values.temperature | quote }} + {{- end }} + {{- if .Values.RETRIEVAL_TOOL_URL }} + RETRIEVAL_TOOL_URL: {{ .Values.RETRIEVAL_TOOL_URL | quote }} + {{- else }} + RETRIEVAL_TOOL_URL: "http://{{ .Release.Name }}-docretriever:8889/v1/retrievaltool" + {{- end }} + {{- if .Values.CRAG_SERVER }} + CRAG_SERVER: {{ .Values.CRAG_SERVER | quote }} + {{- else }} + CRAG_SERVER: "http://{{ .Release.Name }}-crag:8080" + {{- end }} + {{- if .Values.WORKER_AGENT_URL }} + WORKER_AGENT_URL: {{ .Values.WORKER_AGENT_URL | quote }} + {{- else }} + WORKER_AGENT_URL: "http://{{ .Release.Name }}-worker:9095/v1/chat/completions" + {{- end }} + require_human_feedback: {{ .Values.require_human_feedback | quote }} + recursion_limit: {{ .Values.recursion_limit | quote }} + llm_engine: {{ .Values.llm_engine | quote }} + strategy: {{ .Values.strategy | quote }} + max_new_tokens: {{ .Values.max_new_tokens | quote }} + {{- if .Values.OPENAI_API_KEY }} + OPENAI_API_KEY: {{ .Values.OPENAI_API_KEY | quote }} + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote }} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote }} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/agent/templates/deployment.yaml b/helm-charts/agent/templates/deployment.yaml new file mode 100644 index 000000000..12c0e00a2 --- /dev/null +++ b/helm-charts/agent/templates/deployment.yaml @@ -0,0 +1,100 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "agent.fullname" . }} + labels: + {{- include "agent.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "agent.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "agent.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: agent + containerPort: 9090 + protocol: TCP + volumeMounts: + {{- if .Values.toolPath }} + - mountPath: /home/user/tools + name: tool + {{- end }} + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + {{- if .Values.toolPath }} + - name: tool + hostPath: + path: {{ .Values.toolPath }} + type: Directory + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/agent/templates/service.yaml b/helm-charts/agent/templates/service.yaml new file mode 100644 index 000000000..00d34de8d --- /dev/null +++ b/helm-charts/agent/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "agent.fullname" . }} + labels: + {{- include "agent.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9090 + protocol: TCP + name: agent + selector: + {{- include "agent.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/agent/templates/servicemonitor.yaml b/helm-charts/agent/templates/servicemonitor.yaml new file mode 100644 index 000000000..0eaae8763 --- /dev/null +++ b/helm-charts/agent/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "agent.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "agent.selectorLabels" . | nindent 6 }} + endpoints: + - port: agent + interval: 5s +{{- end }} diff --git a/helm-charts/agent/templates/tests/test-pod.yaml b/helm-charts/agent/templates/tests/test-pod.yaml new file mode 100644 index 000000000..61515c7a3 --- /dev/null +++ b/helm-charts/agent/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "agent.fullname" . }}-testpod" + labels: + {{- include "agent.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"query":"What is OPEA?"}' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/agent/values.yaml b/helm-charts/agent/values.yaml new file mode 100644 index 000000000..9d7b236d1 --- /dev/null +++ b/helm-charts/agent/values.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for agent. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: false + LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct" + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + +vllm: + enabled: false + LLM_MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3" + extraCmdArgs: ["/bin/bash", "-c", "python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model mistralai/Mistral-7B-Instruct-v0.3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 4096 --max-seq_len-to-capture 8192 --enable-auto-tool-choice --tool-call-parser mistral"] + +replicaCount: 1 +llm_endpoint_url: "" +model: "meta-llama/Meta-Llama-3.1-70B-Instruct" +max_new_tokens: "4096" +llm_engine: "tgi" +strategy: "react_langchain" +recursion_limit: "15" +require_human_feedback: "false" + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "True" + +image: + repository: opea/agent-langchain + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for agent service is 9090 + port: 9090 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: agent + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/asr/.helmignore b/helm-charts/asr/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/asr/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/asr/Chart.yaml b/helm-charts/asr/Chart.yaml new file mode 100644 index 000000000..90253b164 --- /dev/null +++ b/helm-charts/asr/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: asr +description: The Helm chart for deploying asr as microservice +type: application +version: 0-latest +# The asr microservice server version +appVersion: "v1.0" +dependencies: + - name: whisper + version: 0-latest + repository: file://../whisper + condition: whisper.enabled diff --git a/helm-charts/asr/README.md b/helm-charts/asr/README.md new file mode 100644 index 000000000..a0c131936 --- /dev/null +++ b/helm-charts/asr/README.md @@ -0,0 +1,51 @@ +# asr + +Helm chart for deploying asr microservice. + +asr depends on whisper, you should set ASR_ENDPOINT endpoints before start. + +## (Option1): Installing the chart separately + +First, you need to install the whisper chart, please refer to the [whisper](../whisper/README.md) chart for more information. + +After you've deployted the whisper chart successfully, please run `kubectl get svc` to get the whisper service endpoint, i.e `http://whisper:7066`. + +To install the asr chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/asr +export ASR_ENDPOINT="http://whisper:7066" +helm dependency update +helm install asr . --set ASR_ENDPOINT=${ASR_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/asr +helm dependency update +helm install asr . --set whisper.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/asr 9099:9099` to expose the asr service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9099/v1/audio/transcriptions \ + -XPOST \ + -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ------------ | ----------- | +| image.repository | string | `"opea/asr"` | | +| service.port | string | `"9099"` | | +| ASR_ENDPOINT | string | `""` | | diff --git a/helm-charts/asr/ci-values.yaml b/helm-charts/asr/ci-values.yaml new file mode 100644 index 000000000..b0f302d8b --- /dev/null +++ b/helm-charts/asr/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for asr. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +whisper: + enabled: true diff --git a/helm-charts/asr/templates/_helpers.tpl b/helm-charts/asr/templates/_helpers.tpl new file mode 100644 index 000000000..62d5a0554 --- /dev/null +++ b/helm-charts/asr/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "asr.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "asr.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "asr.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "asr.labels" -}} +helm.sh/chart: {{ include "asr.chart" . }} +{{ include "asr.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "asr.selectorLabels" -}} +app.kubernetes.io/name: {{ include "asr.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "asr.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "asr.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/asr/templates/configmap.yaml b/helm-charts/asr/templates/configmap.yaml new file mode 100644 index 000000000..965e98fc2 --- /dev/null +++ b/helm-charts/asr/templates/configmap.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "asr.fullname" . }}-config + labels: + {{- include "asr.labels" . | nindent 4 }} +data: + {{- if .Values.ASR_ENDPOINT }} + ASR_ENDPOINT: {{ .Values.ASR_ENDPOINT | quote}} + {{- else }} + ASR_ENDPOINT: "http://{{ .Release.Name }}-whisper:7066" + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.ASR_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-whisper,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/asr/templates/deployment.yaml b/helm-charts/asr/templates/deployment.yaml new file mode 100644 index 000000000..309ff56ca --- /dev/null +++ b/helm-charts/asr/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "asr.fullname" . }} + labels: + {{- include "asr.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "asr.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "asr.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "asr.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: asr + containerPort: 9099 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "asr.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/asr/templates/service.yaml b/helm-charts/asr/templates/service.yaml new file mode 100644 index 000000000..dfa3e5b41 --- /dev/null +++ b/helm-charts/asr/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "asr.fullname" . }} + labels: + {{- include "asr.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9099 + protocol: TCP + name: asr + selector: + {{- include "asr.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/asr/templates/tests/test-pod.yaml b/helm-charts/asr/templates/tests/test-pod.yaml new file mode 100644 index 000000000..d71fbf9f7 --- /dev/null +++ b/helm-charts/asr/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "asr.fullname" . }}-testpod" + labels: + {{- include "asr.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "asr.fullname" . }}:{{ .Values.service.port }}/v1/audio/transcriptions \ + -XPOST \ + -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/asr/values.yaml b/helm-charts/asr/values.yaml new file mode 100644 index 000000000..92728e644 --- /dev/null +++ b/helm-charts/asr/values.yaml @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for asr. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +whisper: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +ASR_ENDPOINT: "" + +image: + repository: opea/asr + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for asr service is 9099 + port: 9099 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: asr + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: asr + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: asr + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/chathistory-usvc/.helmignore b/helm-charts/chathistory-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/chathistory-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/chathistory-usvc/Chart.yaml b/helm-charts/chathistory-usvc/Chart.yaml new file mode 100644 index 000000000..f7a4cf8ba --- /dev/null +++ b/helm-charts/chathistory-usvc/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chathistory-usvc +description: The Helm chart for deploying chat history as microservice +type: application +version: 0-latest +# The chat history microservice server version +appVersion: "v1.0" +dependencies: + - name: mongodb + version: 0-latest + repository: file://../mongodb + condition: mongodb.enabled diff --git a/helm-charts/chathistory-usvc/README.md b/helm-charts/chathistory-usvc/README.md new file mode 100644 index 000000000..171e3b93c --- /dev/null +++ b/helm-charts/chathistory-usvc/README.md @@ -0,0 +1,54 @@ +# chathistory-usvc + +Helm chart for deploying chathistory-usvc microservice. + +chathistory-usvc will use redis and tei service, please specify the endpoints. + +## (Option1): Installing the chart separately + +First, you need to install the mongodb chart, please refer to the [mongodb](../mongodb) for more information. + +After you've deployted the mongodb chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `mongodb:27017`. + +To install chathistory-usvc chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/chathistory-usvc +export MONGO_HOST="mongodb" +export MONGO_PORT="27017" +helm dependency update +helm install chathistory-usvc . --set MONGO_HOST=${MONGO_HOST} --set MONGO_PORT=${MONGO_PORT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/chathistory-usvc +helm dependency update +helm install chathistory-usvc . --set mongodb.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/chathistory-usvc 6012:6012` to expose the chathistory-usvc service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl -X 'POST' \ + http://localhost:6012/v1/chathistory/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{"data": {"messages": "test Messages", "user": "test"}}' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | --------------------------------- | ----------- | +| image.repository | string | `"opea/chathistory-mongo-server"` | | +| service.port | string | `"6012"` | | +| MONGO_HOST | string | `""` | | +| MONGO_PORT | string | `""` | | diff --git a/helm-charts/chathistory-usvc/ci-values.yaml b/helm-charts/chathistory-usvc/ci-values.yaml new file mode 100644 index 000000000..b2ce309f3 --- /dev/null +++ b/helm-charts/chathistory-usvc/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for chathistory-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +mongodb: + enabled: true diff --git a/helm-charts/chathistory-usvc/templates/_helpers.tpl b/helm-charts/chathistory-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..6442c29ee --- /dev/null +++ b/helm-charts/chathistory-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "chathistory-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chathistory-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chathistory-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "chathistory-usvc.labels" -}} +helm.sh/chart: {{ include "chathistory-usvc.chart" . }} +{{ include "chathistory-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "chathistory-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chathistory-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "chathistory-usvc.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "chathistory-usvc.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/chathistory-usvc/templates/configmap.yaml b/helm-charts/chathistory-usvc/templates/configmap.yaml new file mode 100644 index 000000000..14149d592 --- /dev/null +++ b/helm-charts/chathistory-usvc/templates/configmap.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "chathistory-usvc.fullname" . }}-config + labels: + {{- include "chathistory-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.MONGO_HOST }} + MONGO_HOST: {{ .Values.MONGO_HOST | quote}} + {{- else }} + MONGO_HOST: "{{ .Release.Name }}-mongodb" + {{- end }} + {{- if .Values.MONGO_PORT }} + MONGO_PORT: {{ .Values.MONGO_PORT | quote }} + {{- else }} + MONGO_PORT: "27017" + {{- end }} + DB_NAME: {{ .Values.DB_NAME | quote }} + COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.MONGO_HOST) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-mongodb,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/chathistory-usvc/templates/deployment.yaml b/helm-charts/chathistory-usvc/templates/deployment.yaml new file mode 100644 index 000000000..b721318e6 --- /dev/null +++ b/helm-charts/chathistory-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chathistory-usvc.fullname" . }} + labels: + {{- include "chathistory-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "chathistory-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "chathistory-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "chathistory-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: port + containerPort: 6012 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "chathistory-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/chathistory-usvc/templates/service.yaml b/helm-charts/chathistory-usvc/templates/service.yaml new file mode 100644 index 000000000..d6cae5a99 --- /dev/null +++ b/helm-charts/chathistory-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chathistory-usvc.fullname" . }} + labels: + {{- include "chathistory-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 6012 + protocol: TCP + name: port + selector: + {{- include "chathistory-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml b/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..c0193e01a --- /dev/null +++ b/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "chathistory-usvc.fullname" . }}-testpod + labels: + {{- include "chathistory-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + set -x + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -X 'POST' \ + http://{{ include "chathistory-usvc.fullname" . }}:{{ .Values.service.port }}/v1/chathistory/create -sS --fail-with-body \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{"data": {"messages": "test Messages", "user": "test"}}' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/chathistory-usvc/values.yaml b/helm-charts/chathistory-usvc/values.yaml new file mode 100644 index 000000000..d5f2faa92 --- /dev/null +++ b/helm-charts/chathistory-usvc/values.yaml @@ -0,0 +1,96 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for chathistory-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +mongodb: + enabled: false + +replicaCount: 1 + +image: + repository: opea/chathistory-mongo-server + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + port: 6012 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +# MongoDB info +MONGO_HOST: "" +MONGO_PORT: "" +DB_NAME: "OPEA" +COLLECTION_NAME: "ChatHistory" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/data-prep/.helmignore b/helm-charts/data-prep/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/data-prep/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/data-prep/Chart.yaml b/helm-charts/data-prep/Chart.yaml new file mode 100644 index 000000000..80f5809c1 --- /dev/null +++ b/helm-charts/data-prep/Chart.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: data-prep +description: The Helm chart for deploying data prep as microservice +type: application +version: 0-latest +# The data prep microservice server version +appVersion: "v1.0" +dependencies: + - name: tei + version: 0-latest + repository: file://../tei + condition: tei.enabled + - name: redis-vector-db + version: 0-latest + repository: file://../redis-vector-db + condition: redis-vector-db.enabled + - name: milvus + version: 4.2.12 + repository: https://zilliztech.github.io/milvus-helm/ + condition: milvus.enabled diff --git a/helm-charts/data-prep/README.md b/helm-charts/data-prep/README.md new file mode 100644 index 000000000..4a05e2f34 --- /dev/null +++ b/helm-charts/data-prep/README.md @@ -0,0 +1,58 @@ +# data-prep + +Helm chart for deploying data-prep microservice. + +data-prep will use redis and tei service, please specify the endpoints. + +## (Option1): Installing the chart separately + +First, you need to install the tei and redis-vector-db chart, please refer to the [tei](../tei/README.md) and [redis-vector-db](../redis-vector-db/README.md) for more information. + +After you've deployted the tei and redis-vector-db chart successfully, please run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `http://tei`, `redis://redis-vector-db:6379`. + +To install data-prep chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/data-prep +export REDIS_URL="redis://redis-vector-db:6379" +export TEI_EMBEDDING_ENDPOINT="http://tei" +helm dependency update +helm install data-prep . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/data-prep +helm dependency update +helm install data-prep . --set redis-vector-db.enabled=true --set tei.enabled=true + +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/data-prep 6007:6007` to expose the data-prep service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:6007/v1/dataprep \ + -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./README.md" +``` + +## Values + +| Key | Type | Default | Description | +| ---------------------- | ------ | ----------------------- | ----------- | +| image.repository | string | `"opea/dataprep-redis"` | | +| service.port | string | `"6007"` | | +| REDIS_URL | string | `""` | | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | + +## Milvus support + +Refer to the milvus-values.yaml for milvus configurations. diff --git a/helm-charts/data-prep/ci-values.yaml b/helm-charts/data-prep/ci-values.yaml new file mode 100644 index 000000000..473698ec0 --- /dev/null +++ b/helm-charts/data-prep/ci-values.yaml @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for data-prep. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: true +redis-vector-db: + enabled: true +milvus: + enabled: false diff --git a/helm-charts/data-prep/milvus-values.yaml b/helm-charts/data-prep/milvus-values.yaml new file mode 100644 index 000000000..3a3cb76b7 --- /dev/null +++ b/helm-charts/data-prep/milvus-values.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for data-prep. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +milvus: + enabled: true + cluster: + enabled: false + etcd: + replicaCount: 1 + pulsar: + enabled: false + minio: + mode: standalone +redis-vector-db: + enabled: false +tei: + enabled: true + +image: + repository: opea/dataprep-milvus + +port: 6010 +# text embedding inference service URL, e.g. http://: +#TEI_EMBEDDING_ENDPOINT: "http://embedding-tei:80" +# milvus DB configurations +#MILVUS_HOST: "milvustest" +MILVUS_PORT: "19530" +COLLECTION_NAME: "rag_milvus" +MOSEC_EMBEDDING_ENDPOINT: "" +MOSEC_EMBEDDING_MODEL: "" diff --git a/helm-charts/data-prep/templates/_helpers.tpl b/helm-charts/data-prep/templates/_helpers.tpl new file mode 100644 index 000000000..8d2062396 --- /dev/null +++ b/helm-charts/data-prep/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "data-prep.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "data-prep.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "data-prep.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "data-prep.labels" -}} +helm.sh/chart: {{ include "data-prep.chart" . }} +{{ include "data-prep.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "data-prep.selectorLabels" -}} +app.kubernetes.io/name: {{ include "data-prep.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "data-prep.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "data-prep.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/data-prep/templates/configmap.yaml b/helm-charts/data-prep/templates/configmap.yaml new file mode 100644 index 000000000..e0306c0b1 --- /dev/null +++ b/helm-charts/data-prep/templates/configmap.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "data-prep.fullname" . }}-config + labels: + {{- include "data-prep.labels" . | nindent 4 }} +data: + {{- if .Values.MOSEC_EMBEDDING_ENDPOINT }} + MOSEC_EMBEDDING_ENDPOINT: {{ .Values.MOSEC_EMBEDDING_ENDPOINT | quote}} + MOSEC_EMBEDDING_MODEL: {{ .Values.MOSEC_EMBEDDING_MODEL | quote}} + {{- else if .Values.TEI_EMBEDDING_ENDPOINT }} + TEI_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}} + TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}} + {{- else if not .Values.LOCAL_EMBEDDING_MODEL }} + TEI_ENDPOINT: "http://{{ .Release.Name }}-tei" + {{- end }} + {{- if .Values.LOCAL_EMBEDDING_MODEL }} + EMBED_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }} + LOCAL_EMBEDDING_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }} + {{- end }} + {{- if .Values.REDIS_URL }} + REDIS_URL: {{ .Values.REDIS_URL | quote}} + {{- else }} + REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379" + {{- end }} + INDEX_NAME: {{ .Values.INDEX_NAME | quote }} + KEY_INDEX_NAME: {{ .Values.KEY_INDEX_NAME | quote }} + SEARCH_BATCH_SIZE: {{ .Values.SEARCH_BATCH_SIZE | quote }} + {{- if .Values.MILVUS_HOST }} + MILVUS_HOST: {{ .Values.MILVUS_HOST | quote }} + {{- else }} + MILVUS_HOST: "{{ .Release.Name }}-milvus" + {{- end }} + MILVUS: {{ .Values.MILVUS_HOST | quote }} + MILVUS_PORT: {{ .Values.MILVUS_PORT | quote }} + {{- if .Values.COLLECTION_NAME }} + COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }} + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.REDIS_URL) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }} + no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/data-prep/templates/deployment.yaml b/helm-charts/data-prep/templates/deployment.yaml new file mode 100644 index 000000000..30faff173 --- /dev/null +++ b/helm-charts/data-prep/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "data-prep.fullname" . }} + labels: + {{- include "data-prep.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "data-prep.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "data-prep.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "data-prep.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: data-prep + containerPort: {{ .Values.port }} + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "data-prep.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/data-prep/templates/service.yaml b/helm-charts/data-prep/templates/service.yaml new file mode 100644 index 000000000..afeff3ecf --- /dev/null +++ b/helm-charts/data-prep/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "data-prep.fullname" . }} + labels: + {{- include "data-prep.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.port }} + protocol: TCP + name: data-prep + selector: + {{- include "data-prep.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/data-prep/templates/tests/test-pod.yaml b/helm-charts/data-prep/templates/tests/test-pod.yaml new file mode 100644 index 000000000..127fa1167 --- /dev/null +++ b/helm-charts/data-prep/templates/tests/test-pod.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "data-prep.fullname" . }}-testpod + labels: + {{- include "data-prep.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + echo "test file" > /tmp/file1.txt; + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep -sS --fail-with-body \ + -X POST \ + -H "Content-Type: multipart/form-data" \ + -F "files=@/tmp/file1.txt" && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep/delete_file -sS \ + -X POST \ + -H "Content-Type: application/json" \ + -d '{"file_path": "file1.txt"}'; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/data-prep/values.yaml b/helm-charts/data-prep/values.yaml new file mode 100644 index 000000000..e35274fcb --- /dev/null +++ b/helm-charts/data-prep/values.yaml @@ -0,0 +1,115 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for data-prep. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: false +milvus: + enabled: false +redis-vector-db: + enabled: false + +replicaCount: 1 + +image: + repository: opea/dataprep-redis + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +port: 6007 +service: + type: ClusterIP + port: 6007 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: data-prep + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +# text embedding inference service URL, e.g. http://: +TEI_EMBEDDING_ENDPOINT: "" + +# local embedder's model +LOCAL_EMBEDDING_MODEL: "" + +# redis DB service URL, e.g. redis://: +REDIS_URL: "" +INDEX_NAME: "rag-redis" +KEY_INDEX_NAME: "file-keys" +SEARCH_BATCH_SIZE: 10 + +# milvus DB configurations +MILVUS_HOST: "" +MILVUS_PORT: "" +COLLECTION_NAME: "" +MOSEC_EMBEDDING_ENDPOINT: "" +MOSEC_EMBEDDING_MODEL: "" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" diff --git a/helm-charts/embedding-usvc/.helmignore b/helm-charts/embedding-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/embedding-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/embedding-usvc/Chart.yaml b/helm-charts/embedding-usvc/Chart.yaml new file mode 100644 index 000000000..7edaba721 --- /dev/null +++ b/helm-charts/embedding-usvc/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: embedding-usvc +description: The Helm chart for deploying embedding as microservice +type: application +version: 0-latest +# The embedding microservice server version +appVersion: "v1.0" +dependencies: + - name: tei + version: 0-latest + repository: file://../tei + condition: tei.enabled diff --git a/helm-charts/embedding-usvc/README.md b/helm-charts/embedding-usvc/README.md new file mode 100644 index 000000000..2bc0ed4bd --- /dev/null +++ b/helm-charts/embedding-usvc/README.md @@ -0,0 +1,52 @@ +# embedding-usvc + +Helm chart for deploying embedding microservice. + +embedding-usvc depends on TEI, set TEI_EMBEDDING_ENDPOINT. + +## (Option1): Installing the chart separately + +First, you need to install the tei chart, please refer to the [tei](../tei) chart for more information. + +After you've deployted the tei chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e. `http://tei`. + +To install the embedding-usvc chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/embedding-usvc +export TEI_EMBEDDING_ENDPOINT="http://tei" +helm dependency update +helm install embedding-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/embedding-usvc +helm dependency update +helm install embedding-usvc . --set tei.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/embedding-usvc 6000:6000` to expose the embedding-usvc service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:6000/v1/embeddings \ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------------- | ------ | ---------------------- | ----------- | +| image.repository | string | `"opea/embedding-tei"` | | +| service.port | string | `"6000"` | | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | +| global.monitoring | bool | `false` | | diff --git a/helm-charts/embedding-usvc/ci-values.yaml b/helm-charts/embedding-usvc/ci-values.yaml new file mode 100644 index 000000000..543c69570 --- /dev/null +++ b/helm-charts/embedding-usvc/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for embedding-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: true diff --git a/helm-charts/embedding-usvc/templates/_helpers.tpl b/helm-charts/embedding-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..229f1a5b5 --- /dev/null +++ b/helm-charts/embedding-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "embedding-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "embedding-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "embedding-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "embedding-usvc.labels" -}} +helm.sh/chart: {{ include "embedding-usvc.chart" . }} +{{ include "embedding-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "embedding-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "embedding-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "llm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/embedding-usvc/templates/configmap.yaml b/helm-charts/embedding-usvc/templates/configmap.yaml new file mode 100644 index 000000000..5ec5904ad --- /dev/null +++ b/helm-charts/embedding-usvc/templates/configmap.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "embedding-usvc.fullname" . }}-config + labels: + {{- include "embedding-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.TEI_EMBEDDING_ENDPOINT }} + TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote }} + {{- else }} + TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei" + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tei,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/embedding-usvc/templates/deployment.yaml b/helm-charts/embedding-usvc/templates/deployment.yaml new file mode 100644 index 000000000..67b2fae99 --- /dev/null +++ b/helm-charts/embedding-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "embedding-usvc.fullname" . }} + labels: + {{- include "embedding-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "embedding-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "embedding-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "embedding-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: embedding-usvc + containerPort: 6000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "embedding-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/embedding-usvc/templates/service.yaml b/helm-charts/embedding-usvc/templates/service.yaml new file mode 100644 index 000000000..2aff873f2 --- /dev/null +++ b/helm-charts/embedding-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "embedding-usvc.fullname" . }} + labels: + {{- include "embedding-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 6000 + protocol: TCP + name: embedding-usvc + selector: + {{- include "embedding-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/embedding-usvc/templates/servicemonitor.yaml b/helm-charts/embedding-usvc/templates/servicemonitor.yaml new file mode 100644 index 000000000..ea26f6cc2 --- /dev/null +++ b/helm-charts/embedding-usvc/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "embedding-usvc.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "embedding-usvc.selectorLabels" . | nindent 6 }} + endpoints: + - port: embedding-usvc + interval: 5s +{{- end }} diff --git a/helm-charts/embedding-usvc/templates/tests/test-pod.yaml b/helm-charts/embedding-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..7acc267b3 --- /dev/null +++ b/helm-charts/embedding-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "embedding-usvc.fullname" . }}-testpod" + labels: + {{- include "embedding-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "embedding-usvc.fullname" . }}:{{ .Values.service.port }}/v1/embeddings -sS --fail-with-body \ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/embedding-usvc/values.yaml b/helm-charts/embedding-usvc/values.yaml new file mode 100644 index 000000000..66f79d789 --- /dev/null +++ b/helm-charts/embedding-usvc/values.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for embedding-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +TEI_EMBEDDING_ENDPOINT: "" +image: + repository: opea/embedding-tei + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for embedding service is 9000 + port: 6000 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: embedding-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/gpt-sovits/.helmignore b/helm-charts/gpt-sovits/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/gpt-sovits/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/gpt-sovits/Chart.yaml b/helm-charts/gpt-sovits/Chart.yaml new file mode 100644 index 000000000..65d83a9dc --- /dev/null +++ b/helm-charts/gpt-sovits/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: gpt-sovits +description: The Helm chart for deploying gpt-sovits as microservice +type: application +version: 0-latest +# The gpt-sovits microservice server version +appVersion: "1.0" diff --git a/helm-charts/gpt-sovits/README.md b/helm-charts/gpt-sovits/README.md new file mode 100644 index 000000000..9f8cdd7ab --- /dev/null +++ b/helm-charts/gpt-sovits/README.md @@ -0,0 +1,44 @@ +# gpt-sovits + +Helm chart for deploying gpt-sovits microservice. + +## Install the chart + +```console +cd GenAIInfra/helm-charts/common/ +helm install gpt-sovits gpt-sovits +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/gpt-sovits 9880:9880` to expose the gpt-sovits service for access. + +Open another terminal and run the following command to verify the service if working: + +- Chinese only + +```bash +curl localhost:9880/ -XPOST -d '{ + "text": "先帝创业未半而中道崩殂,今天下三分,益州疲弊,此诚危急存亡之秋也。", + "text_language": "zh" +}' --output out.wav +``` + +- English only + +```bash +curl localhost:9880/ -XPOST -d '{ + "text": "Discuss the evolution of text-to-speech (TTS) technology from its early beginnings to the present day. Highlight the advancements in natural language processing that have contributed to more realistic and human-like speech synthesis. Also, explore the various applications of TTS in education, accessibility, and customer service, and predict future trends in this field. Write a comprehensive overview of text-to-speech (TTS) technology.", + "text_language": "en" +}' --output out.wav +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ------------------- | ----------- | +| image.repository | string | `"opea/gpt-sovits"` | | +| service.port | string | `"9880"` | | +| TTS_ENDPOINT | string | `""` | | diff --git a/helm-charts/gpt-sovits/ci-values.yaml b/helm-charts/gpt-sovits/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/gpt-sovits/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/gpt-sovits/templates/_helpers.tpl b/helm-charts/gpt-sovits/templates/_helpers.tpl new file mode 100644 index 000000000..f42c32caa --- /dev/null +++ b/helm-charts/gpt-sovits/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "gpt-sovits.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "gpt-sovits.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "gpt-sovits.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "gpt-sovits.labels" -}} +helm.sh/chart: {{ include "gpt-sovits.chart" . }} +{{ include "gpt-sovits.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "gpt-sovits.selectorLabels" -}} +app.kubernetes.io/name: {{ include "gpt-sovits.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "gpt-sovits.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "gpt-sovits.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/gpt-sovits/templates/configmap.yaml b/helm-charts/gpt-sovits/templates/configmap.yaml new file mode 100644 index 000000000..898f0bfe4 --- /dev/null +++ b/helm-charts/gpt-sovits/templates/configmap.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "gpt-sovits.fullname" . }}-config + labels: + {{- include "gpt-sovits.labels" . | nindent 4 }} +data: + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + NUMBA_CACHE_DIR: "/tmp/.cache/numba" + HF_HOME: "/tmp/.cache/huggingface" + XDG_CACHE_HOME: "/tmp/.cache" diff --git a/helm-charts/gpt-sovits/templates/deployment.yaml b/helm-charts/gpt-sovits/templates/deployment.yaml new file mode 100644 index 000000000..d5d6e9d90 --- /dev/null +++ b/helm-charts/gpt-sovits/templates/deployment.yaml @@ -0,0 +1,94 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "gpt-sovits.fullname" . }} + labels: + {{- include "gpt-sovits.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "gpt-sovits.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gpt-sovits.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "gpt-sovits.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: gpt-sovits + containerPort: 9880 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "gpt-sovits.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/gpt-sovits/templates/service.yaml b/helm-charts/gpt-sovits/templates/service.yaml new file mode 100644 index 000000000..ba55efb36 --- /dev/null +++ b/helm-charts/gpt-sovits/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "gpt-sovits.fullname" . }} + labels: + {{- include "gpt-sovits.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9880 + protocol: TCP + name: gpt-sovits + selector: + {{- include "gpt-sovits.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/gpt-sovits/templates/tests/test-pod.yaml b/helm-charts/gpt-sovits/templates/tests/test-pod.yaml new file mode 100644 index 000000000..4d13086b4 --- /dev/null +++ b/helm-charts/gpt-sovits/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "gpt-sovits.fullname" . }}-testpod" + labels: + {{- include "gpt-sovits.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "gpt-sovits.fullname" . }}:{{ .Values.service.port }}/ \ + -X POST \ + -d '{"text": "Discuss the evolution of text-to-speech.", "text_language": "en"}' \ + --output /tmp/out.wav && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/gpt-sovits/values.yaml b/helm-charts/gpt-sovits/values.yaml new file mode 100644 index 000000000..d5cff30d7 --- /dev/null +++ b/helm-charts/gpt-sovits/values.yaml @@ -0,0 +1,83 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for whisper. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: opea/gpt-sovits + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + port: 9880 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + tcpSocket: + port: gpt-sovits + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 4 + failureThreshold: 24 +readinessProbe: + tcpSocket: + port: gpt-sovits + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 4 +startupProbe: + tcpSocket: + port: gpt-sovits + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 120 + timeoutSeconds: 2 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/guardrails-usvc/.helmignore b/helm-charts/guardrails-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/guardrails-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/guardrails-usvc/Chart.yaml b/helm-charts/guardrails-usvc/Chart.yaml new file mode 100644 index 000000000..1f229fe41 --- /dev/null +++ b/helm-charts/guardrails-usvc/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: guardrails-usvc +description: The Helm chart for deploying guardrails-usvc as microservice +type: application +version: 0-latest +appVersion: "v1.0" +dependencies: + - name: tgi + version: 0-latest + alias: tgi-guardrails + repository: file://../tgi + condition: tgi-guardrails.enabled diff --git a/helm-charts/guardrails-usvc/README.md b/helm-charts/guardrails-usvc/README.md new file mode 100644 index 000000000..b5656d04d --- /dev/null +++ b/helm-charts/guardrails-usvc/README.md @@ -0,0 +1,57 @@ +# guardrails-usvc + +Helm chart for deploying LLM microservice. + +guardrails-usvc depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint. + +## (Option1): Installing the chart separately + +First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. Please use model `meta-llama/Meta-Llama-Guard-2-8B` during installation. + +After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`. + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/guardrails-usvc +export HFTOKEN="insert-your-huggingface-token-here" +export SAFETY_GUARD_ENDPOINT="http://tgi" +export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" +helm dependency update +helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --wait +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/guardrails-usvc +export HFTOKEN="insert-your-huggingface-token-here" +helm dependency update +helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi-guardrails.enabled=true --wait +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/guardrails-usvc 9090:9090` to expose the llm-uservice service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9090/v1/guardrails \ + -X POST \ + -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory | +| image.repository | string | `"opea/guardrails-usvc"` | | +| service.port | string | `"9090"` | | +| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint | +| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using | diff --git a/helm-charts/guardrails-usvc/ci-values.yaml b/helm-charts/guardrails-usvc/ci-values.yaml new file mode 100644 index 000000000..3aef2fce5 --- /dev/null +++ b/helm-charts/guardrails-usvc/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for guardrails-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi-guardrails: + enabled: true diff --git a/helm-charts/guardrails-usvc/templates/_helpers.tpl b/helm-charts/guardrails-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..088f88455 --- /dev/null +++ b/helm-charts/guardrails-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "guardrails-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "guardrails-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "guardrails-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "guardrails-usvc.labels" -}} +helm.sh/chart: {{ include "guardrails-usvc.chart" . }} +{{ include "guardrails-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "guardrails-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "guardrails-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "guardrails-usvc.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "guardrails-usvc.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/guardrails-usvc/templates/configmap.yaml b/helm-charts/guardrails-usvc/templates/configmap.yaml new file mode 100644 index 000000000..86cc30efc --- /dev/null +++ b/helm-charts/guardrails-usvc/templates/configmap.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "guardrails-usvc.fullname" . }}-config + labels: + {{- include "guardrails-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.SAFETY_GUARD_ENDPOINT }} + SAFETY_GUARD_ENDPOINT: {{ tpl .Values.SAFETY_GUARD_ENDPOINT . | quote}} + {{- else }} + SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails" + {{- end }} + SAFETY_GUARD_MODEL_ID: {{ .Values.SAFETY_GUARD_MODEL_ID | quote }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_HOME: "/tmp/.cache/huggingface" + LOGFLAG: {{ .Values.LOGFLAG | quote }} + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.SAFETY_GUARD_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tgi-guardrails,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} diff --git a/helm-charts/guardrails-usvc/templates/deployment.yaml b/helm-charts/guardrails-usvc/templates/deployment.yaml new file mode 100644 index 000000000..bebf86e59 --- /dev/null +++ b/helm-charts/guardrails-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "guardrails-usvc.fullname" . }} + labels: + {{- include "guardrails-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "guardrails-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "guardrails-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "guardrails-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: guardrails-usvc + containerPort: 9090 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "guardrails-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/guardrails-usvc/templates/service.yaml b/helm-charts/guardrails-usvc/templates/service.yaml new file mode 100644 index 000000000..594312f03 --- /dev/null +++ b/helm-charts/guardrails-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "guardrails-usvc.fullname" . }} + labels: + {{- include "guardrails-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9090 + protocol: TCP + name: guardrails-usvc + selector: + {{- include "guardrails-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml b/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..ec077d430 --- /dev/null +++ b/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "guardrails-usvc.fullname" . }}-testpod" + labels: + {{- include "guardrails-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "guardrails-usvc.fullname" . }}:{{ .Values.service.port }}/v1/guardrails -sS --fail-with-body \ + -X POST \ + -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/guardrails-usvc/values.yaml b/helm-charts/guardrails-usvc/values.yaml new file mode 100644 index 000000000..56a0cc6d4 --- /dev/null +++ b/helm-charts/guardrails-usvc/values.yaml @@ -0,0 +1,96 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for guardrails-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi-guardrails: + enabled: false + LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + +replicaCount: 1 + +# TGI service endpoint +SAFETY_GUARD_ENDPOINT: "" +# Guard Model Id +SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +image: + repository: opea/guardrails-tgi + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + port: 9090 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: guardrails-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" diff --git a/helm-charts/llm-uservice/.helmignore b/helm-charts/llm-uservice/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/llm-uservice/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/llm-uservice/Chart.yaml b/helm-charts/llm-uservice/Chart.yaml new file mode 100644 index 000000000..aefefd516 --- /dev/null +++ b/helm-charts/llm-uservice/Chart.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: llm-uservice +description: The Helm chart for deploying llm as microservice +type: application +version: 0-latest +# The llm microservice server version +appVersion: "v1.0" +dependencies: + - name: tgi + version: 0-latest + repository: file://../tgi + condition: tgi.enabled + - name: vllm + version: 0-latest + repository: file://../vllm + condition: vllm.enabled diff --git a/helm-charts/llm-uservice/README.md b/helm-charts/llm-uservice/README.md new file mode 100644 index 000000000..a1a8d6d47 --- /dev/null +++ b/helm-charts/llm-uservice/README.md @@ -0,0 +1,55 @@ +# llm-uservice + +Helm chart for deploying LLM microservice. + +llm-uservice depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint. + +## (Option1): Installing the chart separately + +First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. + +After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`. + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/llm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +export TGI_LLM_ENDPOINT="http://tgi" +helm dependency update +helm install llm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} --wait +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/llm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +helm dependency update +helm install llm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/llm-uservice 9000:9000` to expose the llm-uservice service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ---------------- | ------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/llm-tgi"` | | +| service.port | string | `"9000"` | | +| TGI_LLM_ENDPOINT | string | `""` | LLM endpoint | +| global.monitoring | bool | `false` | Service usage metrics | diff --git a/helm-charts/llm-uservice/ci-docsum-values.yaml b/helm-charts/llm-uservice/ci-docsum-values.yaml new file mode 100644 index 000000000..b9f269c5a --- /dev/null +++ b/helm-charts/llm-uservice/ci-docsum-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-docsum-tgi + tag: "latest" +tgi: + enabled: true diff --git a/helm-charts/llm-uservice/ci-faqgen-values.yaml b/helm-charts/llm-uservice/ci-faqgen-values.yaml new file mode 100644 index 000000000..f7f3f5a55 --- /dev/null +++ b/helm-charts/llm-uservice/ci-faqgen-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-faqgen-tgi + tag: "latest" + +tgi: + enabled: true + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct diff --git a/helm-charts/llm-uservice/ci-values.yaml b/helm-charts/llm-uservice/ci-values.yaml new file mode 100644 index 000000000..88eef5b4a --- /dev/null +++ b/helm-charts/llm-uservice/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for llm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: true diff --git a/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml b/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml new file mode 100644 index 000000000..2438eaed9 --- /dev/null +++ b/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for llm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +tgi: + enabled: false +vllm: + enabled: true + image: + repository: opea/vllm-gaudi + tag: "latest" + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + OMPI_MCA_btl_vader_single_copy_mechanism: none + extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"] + resources: + limits: + habana.ai/gaudi: 1 + +vLLM_ENDPOINT: "" +LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 +image: + repository: opea/llm-vllm + tag: "latest" diff --git a/helm-charts/llm-uservice/templates/_helpers.tpl b/helm-charts/llm-uservice/templates/_helpers.tpl new file mode 100644 index 000000000..d67db64c2 --- /dev/null +++ b/helm-charts/llm-uservice/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "llm-uservice.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "llm-uservice.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "llm-uservice.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "llm-uservice.labels" -}} +helm.sh/chart: {{ include "llm-uservice.chart" . }} +{{ include "llm-uservice.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "llm-uservice.selectorLabels" -}} +app.kubernetes.io/name: {{ include "llm-uservice.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "llm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/llm-uservice/templates/configmap.yaml b/helm-charts/llm-uservice/templates/configmap.yaml new file mode 100644 index 000000000..bd49777dc --- /dev/null +++ b/helm-charts/llm-uservice/templates/configmap.yaml @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "llm-uservice.fullname" . }}-config + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} +data: + {{- if .Values.TGI_LLM_ENDPOINT }} + TGI_LLM_ENDPOINT: {{ .Values.TGI_LLM_ENDPOINT | quote}} + {{- else }} + TGI_LLM_ENDPOINT: "http://{{ .Release.Name }}-tgi" + {{- end }} + {{- if .Values.vLLM_ENDPOINT }} + vLLM_ENDPOINT: {{ .Values.vLLM_ENDPOINT | quote}} + {{- else }} + vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm" + {{- end }} + {{- if .Values.LLM_MODEL_ID }} + LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}} + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if or .Values.global.http_proxy .Values.global.https_proxy }} + no_proxy: "{{ .Release.Name }}-tgi,{{ .Release.Name }}-vllm,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/llm-uservice/templates/deployment.yaml b/helm-charts/llm-uservice/templates/deployment.yaml new file mode 100644 index 000000000..dfa4e6022 --- /dev/null +++ b/helm-charts/llm-uservice/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llm-uservice.fullname" . }} + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "llm-uservice.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "llm-uservice.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "llm-uservice.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: llm-uservice + containerPort: 9000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "llm-uservice.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/llm-uservice/templates/service.yaml b/helm-charts/llm-uservice/templates/service.yaml new file mode 100644 index 000000000..06e13b8c6 --- /dev/null +++ b/helm-charts/llm-uservice/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "llm-uservice.fullname" . }} + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9000 + protocol: TCP + name: llm-uservice + selector: + {{- include "llm-uservice.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/llm-uservice/templates/servicemonitor.yaml b/helm-charts/llm-uservice/templates/servicemonitor.yaml new file mode 100644 index 000000000..ecb83fc34 --- /dev/null +++ b/helm-charts/llm-uservice/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "llm-uservice.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "llm-uservice.selectorLabels" . | nindent 6 }} + endpoints: + - port: llm-uservice + interval: 5s +{{- end }} diff --git a/helm-charts/llm-uservice/templates/tests/test-pod.yaml b/helm-charts/llm-uservice/templates/tests/test-pod.yaml new file mode 100644 index 000000000..e5012fc75 --- /dev/null +++ b/helm-charts/llm-uservice/templates/tests/test-pod.yaml @@ -0,0 +1,44 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "llm-uservice.fullname" . }}-testpod" + labels: + {{- include "llm-uservice.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + {{- if contains "llm-docsum-tgi" .Values.image.repository }} + # Try with docsum endpoint + curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/docsum -sS --fail-with-body \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' \ + -H 'Content-Type: application/json' && break; + {{- else if contains "llm-faqgen-tgi" .Values.image.repository }} + # Try with faqgen endpoint + curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' \ + -H 'Content-Type: application/json' && break; + {{- else }} + curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' && break; + {{- end }} + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/llm-uservice/values.yaml b/helm-charts/llm-uservice/values.yaml new file mode 100644 index 000000000..8908bb74c --- /dev/null +++ b/helm-charts/llm-uservice/values.yaml @@ -0,0 +1,105 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for llm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: false +vllm: + enabled: false + +replicaCount: 1 +# For tgi +TGI_LLM_ENDPOINT: "" +# For vllm, set the LLM_MODEL_ID the same as vllm sub chart +vLLM_ENDPOINT: "" +LLM_MODEL_ID: "" + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +image: + repository: opea/llm-tgi + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for llm service is 9000 + port: 9000 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: llm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/llm-uservice/variant_docsum-values.yaml b/helm-charts/llm-uservice/variant_docsum-values.yaml new file mode 100644 index 000000000..9e1f33bde --- /dev/null +++ b/helm-charts/llm-uservice/variant_docsum-values.yaml @@ -0,0 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-docsum-tgi + tag: "latest" diff --git a/helm-charts/llm-uservice/variant_faqgen-values.yaml b/helm-charts/llm-uservice/variant_faqgen-values.yaml new file mode 100644 index 000000000..4e51fdd1d --- /dev/null +++ b/helm-charts/llm-uservice/variant_faqgen-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/llm-faqgen-tgi + tag: "latest" + +tgi: + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct diff --git a/helm-charts/lvm-uservice/.helmignore b/helm-charts/lvm-uservice/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/lvm-uservice/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/lvm-uservice/Chart.yaml b/helm-charts/lvm-uservice/Chart.yaml new file mode 100644 index 000000000..66375e7ae --- /dev/null +++ b/helm-charts/lvm-uservice/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: lvm-uservice +description: The Helm chart for deploying lvm as microservice +type: application +version: 0-latest +# The lvm microservice server version +appVersion: "v1.0" +dependencies: + - name: tgi + version: 0-latest + repository: file://../tgi + condition: tgi.enabled diff --git a/helm-charts/lvm-uservice/README.md b/helm-charts/lvm-uservice/README.md new file mode 100644 index 000000000..d8bfcd6b0 --- /dev/null +++ b/helm-charts/lvm-uservice/README.md @@ -0,0 +1,55 @@ +# lvm-uservice + +Helm chart for deploying LVM microservice. + +lvm-uservice depends on TGI, you should set LVM_ENDPOINT as tgi endpoint. + +## (Option1): Installing the chart separately + +First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. + +After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`. + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/lvm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +export LVM_ENDPOINT="http://tgi" +helm dependency update +helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/lvm-uservice +export HFTOKEN="insert-your-huggingface-token-here" +helm dependency update +helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/lvm-uservice 9000:9000` to expose the lvm-uservice service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ---------------- | ------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/lvm-tgi"` | | +| service.port | string | `"9000"` | | +| LVM_ENDPOINT | string | `""` | LVM endpoint | +| global.monitoring | bool | `false` | Service usage metrics | diff --git a/helm-charts/lvm-uservice/ci-values.yaml b/helm-charts/lvm-uservice/ci-values.yaml new file mode 100644 index 000000000..a4c378251 --- /dev/null +++ b/helm-charts/lvm-uservice/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for lvm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: true diff --git a/helm-charts/lvm-uservice/templates/_helpers.tpl b/helm-charts/lvm-uservice/templates/_helpers.tpl new file mode 100644 index 000000000..86c26b0e6 --- /dev/null +++ b/helm-charts/lvm-uservice/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "lvm-uservice.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "lvm-uservice.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "lvm-uservice.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "lvm-uservice.labels" -}} +helm.sh/chart: {{ include "lvm-uservice.chart" . }} +{{ include "lvm-uservice.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "lvm-uservice.selectorLabels" -}} +app.kubernetes.io/name: {{ include "lvm-uservice.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "lvm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "lvm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/lvm-uservice/templates/configmap.yaml b/helm-charts/lvm-uservice/templates/configmap.yaml new file mode 100644 index 000000000..62f75d323 --- /dev/null +++ b/helm-charts/lvm-uservice/templates/configmap.yaml @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "lvm-uservice.fullname" . }}-config + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +data: + {{- if .Values.LVM_ENDPOINT }} + LVM_ENDPOINT: {{ .Values.LVM_ENDPOINT | quote}} + {{- else }} + LVM_ENDPOINT: "http://{{ .Release.Name }}-tgi" + {{- end }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.LVM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/lvm-uservice/templates/deployment.yaml b/helm-charts/lvm-uservice/templates/deployment.yaml new file mode 100644 index 000000000..c276087d6 --- /dev/null +++ b/helm-charts/lvm-uservice/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "lvm-uservice.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "lvm-uservice.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: lvm-uservice + containerPort: 9399 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/lvm-uservice/templates/service.yaml b/helm-charts/lvm-uservice/templates/service.yaml new file mode 100644 index 000000000..37e454dec --- /dev/null +++ b/helm-charts/lvm-uservice/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9399 + protocol: TCP + name: lvm-uservice + selector: + {{- include "lvm-uservice.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/lvm-uservice/templates/servicemonitor.yaml b/helm-charts/lvm-uservice/templates/servicemonitor.yaml new file mode 100644 index 000000000..9fe58419b --- /dev/null +++ b/helm-charts/lvm-uservice/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "lvm-uservice.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "lvm-uservice.selectorLabels" . | nindent 6 }} + endpoints: + - port: lvm-uservice + interval: 5s +{{- end }} diff --git a/helm-charts/lvm-uservice/templates/tests/test-pod.yaml b/helm-charts/lvm-uservice/templates/tests/test-pod.yaml new file mode 100644 index 000000000..7782cdb1e --- /dev/null +++ b/helm-charts/lvm-uservice/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "lvm-uservice.fullname" . }}-testpod" + labels: + {{- include "lvm-uservice.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "lvm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/lvm -sS --fail-with-body \ + -X POST \ + -d '{"image":"iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC","prompt":"What is this?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/lvm-uservice/values.yaml b/helm-charts/lvm-uservice/values.yaml new file mode 100644 index 000000000..75188a7e0 --- /dev/null +++ b/helm-charts/lvm-uservice/values.yaml @@ -0,0 +1,102 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for lvm-uservice. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tgi: + enabled: false + LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf + MAX_INPUT_LENGTH: "4096" + MAX_TOTAL_TOKENS: "8192" + +replicaCount: 1 +LVM_ENDPOINT: "" + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +image: + repository: opea/lvm-tgi + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for lvm service is 9399 + port: 9399 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: lvm-uservice + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/mongodb/.helmignore b/helm-charts/mongodb/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/mongodb/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/mongodb/Chart.yaml b/helm-charts/mongodb/Chart.yaml new file mode 100644 index 000000000..a93f27403 --- /dev/null +++ b/helm-charts/mongodb/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: mongodb +description: The Helm chart for Redis Vector DB +type: application +version: 0-latest +appVersion: "7.0.11" diff --git a/helm-charts/mongodb/README.md b/helm-charts/mongodb/README.md new file mode 100644 index 000000000..eebb49c04 --- /dev/null +++ b/helm-charts/mongodb/README.md @@ -0,0 +1,28 @@ +# mongodb + +Helm chart for deploying mongo DB service. + +## Install the Chart + +To install the chart, run the following: + +```console +cd ${GenAIInfro_repo}/helm-charts/common +helm install mongodb mongodb +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all the mongo pods are runinng. + +Then run the command `kubectl port-forward svc/mongodb 27017:27017` to expose the mongodb service for access. + +Open another terminal and run the command `mongo --eval 'db.runCommand("ping").ok' localhost:27017/test --quiet ` to test mongodb access. The `mongo` command should return `1`. + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ---------- | ------------------------ | +| image.repository | string | `"mongo"` | | +| image.tag | string | `"7.0.11"` | | +| service.port | string | `"27017"` | The mongodb service port | diff --git a/helm-charts/mongodb/ci-values.yaml b/helm-charts/mongodb/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/mongodb/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/mongodb/templates/_helpers.tpl b/helm-charts/mongodb/templates/_helpers.tpl new file mode 100644 index 000000000..6b1b8c179 --- /dev/null +++ b/helm-charts/mongodb/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "mongodb.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "mongodb.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "mongodb.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "mongodb.labels" -}} +helm.sh/chart: {{ include "mongodb.chart" . }} +{{ include "mongodb.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "mongodb.selectorLabels" -}} +app.kubernetes.io/name: {{ include "mongodb.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "mongodb.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "mongodb.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/mongodb/templates/deployment.yaml b/helm-charts/mongodb/templates/deployment.yaml new file mode 100644 index 000000000..4fca11ffd --- /dev/null +++ b/helm-charts/mongodb/templates/deployment.yaml @@ -0,0 +1,78 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mongodb.fullname" . }} + labels: + {{- include "mongodb.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "mongodb.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mongodb.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /tmp + name: tmp + ports: + - name: port + containerPort: 27017 + protocol: TCP + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: data-volume + emptyDir: {} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "mongodb.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/mongodb/templates/service.yaml b/helm-charts/mongodb/templates/service.yaml new file mode 100644 index 000000000..979628dbd --- /dev/null +++ b/helm-charts/mongodb/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mongodb.fullname" . }} + labels: + {{- include "mongodb.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 27017 + protocol: TCP + name: mongodb + selector: + {{- include "mongodb.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/mongodb/templates/tests/test-pod.yaml b/helm-charts/mongodb/templates/tests/test-pod.yaml new file mode 100644 index 000000000..8236679ba --- /dev/null +++ b/helm-charts/mongodb/templates/tests/test-pod.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "mongodb.fullname" . }}-testpod + labels: + {{- include "mongodb.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: mongoclient + image: mongoclient/mongoclient:latest + command: ['sh', '-c'] + args: + - | + set -x + mongo --eval 'db.runCommand("ping").ok' {{ include "mongodb.fullname" . }}:{{ .Values.service.port }}/test --quiet + restartPolicy: Never diff --git a/helm-charts/mongodb/values.yaml b/helm-charts/mongodb/values.yaml new file mode 100644 index 000000000..cb38a714d --- /dev/null +++ b/helm-charts/mongodb/values.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 +image: + repository: mongo + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "7.0.11" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: false + runAsUser: 999 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + port: 27017 + +startupProbe: + tcpSocket: + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/helm-charts/prompt-usvc/.helmignore b/helm-charts/prompt-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/prompt-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/prompt-usvc/Chart.yaml b/helm-charts/prompt-usvc/Chart.yaml new file mode 100644 index 000000000..a564d48c5 --- /dev/null +++ b/helm-charts/prompt-usvc/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: prompt-usvc +description: The Helm chart for deploying prompt as microservice +type: application +version: 0-latest +# The prompt microservice server version +appVersion: "v1.0" +dependencies: + - name: mongodb + version: 0-latest + repository: file://../mongodb + condition: mongodb.enabled diff --git a/helm-charts/prompt-usvc/README.md b/helm-charts/prompt-usvc/README.md new file mode 100644 index 000000000..38b240ed1 --- /dev/null +++ b/helm-charts/prompt-usvc/README.md @@ -0,0 +1,54 @@ +# prompt-usvc + +Helm chart for deploying prompt-usvc microservice. + +prompt-usvc will use mongo database service, please specify the endpoints. + +## (Option1): Installing the chart separately + +First, you need to install the mongodb chart, please refer to the [mongodb](../mongodb) for more information. + +After you've deployted the mongodb chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `mongodb:27017`. + +To install prompt-usvc chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/prompt-usvc +export MONGO_HOST="mongodb" +export MONGO_PORT="27017" +helm dependency update +helm install prompt-usvc . --set MONGO_HOST=${MONGO_HOST} --set MONGO_PORT=${MONGO_PORT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/prompt-usvc +helm dependency update +helm install prompt-usvc . --set mongodb.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/prompt-usvc 6018:6018` to expose the prompt-usvc service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl -X 'POST' \ + http://localhost:6018/v1/prompt/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{"prompt_text": "test prompt", "user": "test"}'; +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ----------------------------- | ----------- | +| image.repository | string | `"opea/promptregistry-mongo"` | | +| service.port | string | `"6018"` | | +| MONGO_HOST | string | `""` | | +| MONGO_PORT | string | `""` | | diff --git a/helm-charts/prompt-usvc/ci-values.yaml b/helm-charts/prompt-usvc/ci-values.yaml new file mode 100644 index 000000000..1e0d5c386 --- /dev/null +++ b/helm-charts/prompt-usvc/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for prompt-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +mongodb: + enabled: true diff --git a/helm-charts/prompt-usvc/templates/_helpers.tpl b/helm-charts/prompt-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..147787115 --- /dev/null +++ b/helm-charts/prompt-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "prompt-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prompt-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prompt-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prompt-usvc.labels" -}} +helm.sh/chart: {{ include "prompt-usvc.chart" . }} +{{ include "prompt-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prompt-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prompt-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "prompt-usvc.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prompt-usvc.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/prompt-usvc/templates/configmap.yaml b/helm-charts/prompt-usvc/templates/configmap.yaml new file mode 100644 index 000000000..e78bba220 --- /dev/null +++ b/helm-charts/prompt-usvc/templates/configmap.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "prompt-usvc.fullname" . }}-config + labels: + {{- include "prompt-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.MONGO_HOST }} + MONGO_HOST: {{ .Values.MONGO_HOST | quote}} + {{- else }} + MONGO_HOST: "{{ .Release.Name }}-mongodb" + {{- end }} + {{- if .Values.MONGO_PORT }} + MONGO_PORT: {{ .Values.MONGO_PORT | quote }} + {{- else }} + MONGO_PORT: "27017" + {{- end }} + DB_NAME: {{ .Values.DB_NAME | quote }} + COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.MONGO_HOST) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-mongodb,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/prompt-usvc/templates/deployment.yaml b/helm-charts/prompt-usvc/templates/deployment.yaml new file mode 100644 index 000000000..836668384 --- /dev/null +++ b/helm-charts/prompt-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "prompt-usvc.fullname" . }} + labels: + {{- include "prompt-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "prompt-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "prompt-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "prompt-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: port + containerPort: {{ .Values.port }} + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "prompt-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/prompt-usvc/templates/service.yaml b/helm-charts/prompt-usvc/templates/service.yaml new file mode 100644 index 000000000..f22105fd8 --- /dev/null +++ b/helm-charts/prompt-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "prompt-usvc.fullname" . }} + labels: + {{- include "prompt-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.port }} + protocol: TCP + name: port + selector: + {{- include "prompt-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/prompt-usvc/templates/tests/test-pod.yaml b/helm-charts/prompt-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..70d1ea3ed --- /dev/null +++ b/helm-charts/prompt-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "prompt-usvc.fullname" . }}-testpod + labels: + {{- include "prompt-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + set -x + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -X 'POST' \ + http://{{ include "prompt-usvc.fullname" . }}:{{ .Values.service.port }}/v1/prompt/create -sS --fail-with-body \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{"prompt_text": "test prompt", "user": "test"}' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/prompt-usvc/values.yaml b/helm-charts/prompt-usvc/values.yaml new file mode 100644 index 000000000..a475e6f5e --- /dev/null +++ b/helm-charts/prompt-usvc/values.yaml @@ -0,0 +1,99 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for prompt-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +mongodb: + enabled: false + +replicaCount: 1 + +image: + repository: opea/promptregistry-mongo-server + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + port: 6018 + +# Change the port to 6012 if you are using 1.0 docker images +# https://github.com/opea-project/GenAIComps/pull/740 +port: 6018 +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +# mongo DB service URL, e.g. mongo://: +MONGO_HOST: "" +MONGO_PORT: 27017 +DB_NAME: "OPEA" +COLLECTION_NAME: "Prompt" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/redis-vector-db/.helmignore b/helm-charts/redis-vector-db/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/redis-vector-db/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/redis-vector-db/Chart.yaml b/helm-charts/redis-vector-db/Chart.yaml new file mode 100644 index 000000000..6e43a25d0 --- /dev/null +++ b/helm-charts/redis-vector-db/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: redis-vector-db +description: The Helm chart for Redis Vector DB +type: application +version: 0-latest +appVersion: "7.2.0-v9" diff --git a/helm-charts/redis-vector-db/README.md b/helm-charts/redis-vector-db/README.md new file mode 100644 index 000000000..108367c92 --- /dev/null +++ b/helm-charts/redis-vector-db/README.md @@ -0,0 +1,29 @@ +# redis-vector-db + +Helm chart for deploying Redis Vector DB service. + +## Install the Chart + +To install the chart, run the following: + +```console +cd ${GenAIInfro_repo}/helm-charts/common +helm install redis-vector-db redis-vector-db +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all the redis pods are runinng. + +Then run the command `kubectl port-forward svc/redis-vector-db 6379:6379` to expose the redis vector db service for access. + +Open another terminal and run the command `redis-cli -h 127.0.0.1 -p 6379 ping` to access the redis vector db. The `redis-cli` command should return `PONG`. + +## Values + +| Key | Type | Default | Description | +| ---------------------------- | ------ | --------------------- | ---------------------- | +| image.repository | string | `"redis/redis-stack"` | | +| image.tag | string | `"7.2.0-v9"` | | +| service.port (redis-service) | string | `"6379"` | The redis-service port | +| service.port (redis-insight) | string | `"8001"` | The redis-insight port | diff --git a/helm-charts/redis-vector-db/ci-values.yaml b/helm-charts/redis-vector-db/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/redis-vector-db/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/redis-vector-db/templates/_helpers.tpl b/helm-charts/redis-vector-db/templates/_helpers.tpl new file mode 100644 index 000000000..f8f7a65da --- /dev/null +++ b/helm-charts/redis-vector-db/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "redis-vector-db.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "redis-vector-db.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "redis-vector-db.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "redis-vector-db.labels" -}} +helm.sh/chart: {{ include "redis-vector-db.chart" . }} +{{ include "redis-vector-db.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "redis-vector-db.selectorLabels" -}} +app.kubernetes.io/name: {{ include "redis-vector-db.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "redis-vector-db.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "redis-vector-db.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/redis-vector-db/templates/deployment.yaml b/helm-charts/redis-vector-db/templates/deployment.yaml new file mode 100644 index 000000000..d4fc692b6 --- /dev/null +++ b/helm-charts/redis-vector-db/templates/deployment.yaml @@ -0,0 +1,87 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "redis-vector-db.fullname" . }} + labels: + {{- include "redis-vector-db.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "redis-vector-db.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "redis-vector-db.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + volumeMounts: + - mountPath: /data + name: data-volume + - mountPath: /redisinsight + name: redisinsight-volume + - mountPath: /tmp + name: tmp + ports: + {{- $redisServicePort := index .Values.service.ports 0 }} + {{- range .Values.service.ports }} + - name: {{ .name }} + containerPort: {{ .targetPort }} + protocol: TCP + {{- end }} + startupProbe: + tcpSocket: + port: {{ $redisServicePort.targetPort }} # Probe the Redis port + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: data-volume + emptyDir: {} + - name: redisinsight-volume + emptyDir: {} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "redis-vector-db.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/redis-vector-db/templates/service.yaml b/helm-charts/redis-vector-db/templates/service.yaml new file mode 100644 index 000000000..d656f04ce --- /dev/null +++ b/helm-charts/redis-vector-db/templates/service.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "redis-vector-db.fullname" . }} + labels: + {{- include "redis-vector-db.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + {{- range .Values.service.ports }} + - port: {{ .port }} + targetPort: {{ .targetPort }} + protocol: {{ .protocol }} + name: {{ .name }} + {{- end }} + selector: + {{- include "redis-vector-db.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/redis-vector-db/templates/tests/test-pod.yaml b/helm-charts/redis-vector-db/templates/tests/test-pod.yaml new file mode 100644 index 000000000..d694c1f63 --- /dev/null +++ b/helm-charts/redis-vector-db/templates/tests/test-pod.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "redis-vector-db.fullname" . }}-testpod + labels: + {{- include "redis-vector-db.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: redis:7.2-alpine + command: ['sh', '-c'] + args: + - | + redis-cli -h {{ include "redis-vector-db.fullname" . }} \ + {{- with (first .Values.service.ports) }} + -p {{ .port }} \ + {{- end }} + ping + restartPolicy: Never diff --git a/helm-charts/redis-vector-db/values.yaml b/helm-charts/redis-vector-db/values.yaml new file mode 100644 index 000000000..d07339c3d --- /dev/null +++ b/helm-charts/redis-vector-db/values.yaml @@ -0,0 +1,63 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 +image: + repository: redis/redis-stack + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "7.2.0-v9" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + ports: + - name: redis-service + port: 6379 + targetPort: 6379 + protocol: TCP + - name: redis-insight + port: 8001 + targetPort: 8001 + protocol: TCP +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} diff --git a/helm-charts/reranking-usvc/.helmignore b/helm-charts/reranking-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/reranking-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/reranking-usvc/Chart.yaml b/helm-charts/reranking-usvc/Chart.yaml new file mode 100644 index 000000000..a7f722882 --- /dev/null +++ b/helm-charts/reranking-usvc/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: reranking-usvc +description: The Helm chart for deploying reranking as microservice +type: application +version: 0-latest +# The reranking microservice server version +appVersion: "v1.0" +dependencies: + - name: teirerank + version: 0-latest + repository: file://../teirerank + condition: teirerank.enabled diff --git a/helm-charts/reranking-usvc/README.md b/helm-charts/reranking-usvc/README.md new file mode 100644 index 000000000..bf77f7f3c --- /dev/null +++ b/helm-charts/reranking-usvc/README.md @@ -0,0 +1,52 @@ +# reranking-usvc + +Helm chart for deploying reranking microservice. + +reranking-usvc depends on teirerank, set the TEI_RERANKING_ENDPOINT as teirerank endpoint. + +## (Option1): Installing the chart separately + +First, you need to install the teirerank chart, please refer to the [teirerank](../teirerank) chart for more information. + +After you've deployted the teirerank chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e. `http://teirerank`. + +To install the reranking-usvc chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/reranking-usvc +export TEI_RERANKING_ENDPOINT="http://teirerank" +helm dependency update +helm install reranking-usvc . --set TEI_RERANKING_ENDPOINT=${TEI_RERANKING_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/reranking-usvc +helm dependency update +helm install reranking-usvc . --set teirerank.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/reranking-usvc 8000:8000` to expose the reranking-usvc service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8000/v1/reranking \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------------- | ------ | ---------------------- | ----------- | +| image.repository | string | `"opea/reranking-tgi"` | | +| TEI_RERANKING_ENDPOINT | string | `""` | | +| service.port | string | `"8000"` | | +| global.monitoring | bool | `false` | | diff --git a/helm-charts/reranking-usvc/ci-values.yaml b/helm-charts/reranking-usvc/ci-values.yaml new file mode 100644 index 000000000..1118483f4 --- /dev/null +++ b/helm-charts/reranking-usvc/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for reranking-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +teirerank: + enabled: true diff --git a/helm-charts/reranking-usvc/templates/_helpers.tpl b/helm-charts/reranking-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..9247fe13b --- /dev/null +++ b/helm-charts/reranking-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "reranking-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "reranking-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "reranking-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "reranking-usvc.labels" -}} +helm.sh/chart: {{ include "reranking-usvc.chart" . }} +{{ include "reranking-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "reranking-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "reranking-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "llm-uservice.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/reranking-usvc/templates/configmap.yaml b/helm-charts/reranking-usvc/templates/configmap.yaml new file mode 100644 index 000000000..69b1fcd91 --- /dev/null +++ b/helm-charts/reranking-usvc/templates/configmap.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "reranking-usvc.fullname" . }}-config + labels: + {{- include "reranking-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.TEI_RERANKING_ENDPOINT }} + TEI_RERANKING_ENDPOINT: {{ .Values.TEI_RERANKING_ENDPOINT | quote }} + {{- else }} + TEI_RERANKING_ENDPOINT: "http://{{ .Release.Name }}-teirerank" + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TEI_RERANKING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-teirerank,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/reranking-usvc/templates/deployment.yaml b/helm-charts/reranking-usvc/templates/deployment.yaml new file mode 100644 index 000000000..50abfc5a0 --- /dev/null +++ b/helm-charts/reranking-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "reranking-usvc.fullname" . }} + labels: + {{- include "reranking-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "reranking-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "reranking-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "reranking-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: reranking-usvc + containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "reranking-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/reranking-usvc/templates/service.yaml b/helm-charts/reranking-usvc/templates/service.yaml new file mode 100644 index 000000000..2b1bb7afa --- /dev/null +++ b/helm-charts/reranking-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "reranking-usvc.fullname" . }} + labels: + {{- include "reranking-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 8000 + protocol: TCP + name: reranking-usvc + selector: + {{- include "reranking-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/reranking-usvc/templates/servicemonitor.yaml b/helm-charts/reranking-usvc/templates/servicemonitor.yaml new file mode 100644 index 000000000..8d1306edf --- /dev/null +++ b/helm-charts/reranking-usvc/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "reranking-usvc.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "reranking-usvc.selectorLabels" . | nindent 6 }} + endpoints: + - port: reranking-usvc + interval: 5s +{{- end }} diff --git a/helm-charts/reranking-usvc/templates/tests/test-pod.yaml b/helm-charts/reranking-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..4bd7ac745 --- /dev/null +++ b/helm-charts/reranking-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "reranking-usvc.fullname" . }}-testpod" + labels: + {{- include "reranking-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "reranking-usvc.fullname" . }}:{{ .Values.service.port }}/v1/reranking -sS --fail-with-body \ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/reranking-usvc/values.yaml b/helm-charts/reranking-usvc/values.yaml new file mode 100644 index 000000000..924815ff7 --- /dev/null +++ b/helm-charts/reranking-usvc/values.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for reranking-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +teirerank: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +TEI_RERANKING_ENDPOINT: "" +image: + repository: opea/reranking-tei + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for reranking service is 9000 + port: 8000 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: reranking-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/retriever-usvc/.helmignore b/helm-charts/retriever-usvc/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/retriever-usvc/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/retriever-usvc/Chart.yaml b/helm-charts/retriever-usvc/Chart.yaml new file mode 100644 index 000000000..0bb54c3c0 --- /dev/null +++ b/helm-charts/retriever-usvc/Chart.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: retriever-usvc +description: The Helm chart for deploying retriever as microservice +type: application +version: 0-latest +# The retriever microservice server version +appVersion: "v1.0" +dependencies: + - name: tei + version: 0-latest + repository: file://../tei + condition: tei.enabled + - name: redis-vector-db + version: 0-latest + repository: file://../redis-vector-db + condition: redis-vector-db.enabled + - name: milvus + version: 4.2.12 + repository: https://zilliztech.github.io/milvus-helm/ + condition: milvus.enabled diff --git a/helm-charts/retriever-usvc/README.md b/helm-charts/retriever-usvc/README.md new file mode 100644 index 000000000..1eba680c3 --- /dev/null +++ b/helm-charts/retriever-usvc/README.md @@ -0,0 +1,59 @@ +# retriever-usvc + +Helm chart for deploying Retriever microservice. + +retriever-usvc depends on redis and tei, you should set these endpoints before start. + +## (Option1): Installing the chart separately + +First, you need to install the tei and redis-vector-db chart, refer to the [tei](../tei/README.md) and [redis-vector-db](../redis-vector-db/README.md) for more information. + +After you've deployed the tei and redis-vector-db chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `http://tei`, `redis://redis-vector-db:6379`. + +To install retriever-usvc chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/retriever-usvc +export REDIS_URL="redis://redis-vector-db:6379" +export TEI_EMBEDDING_ENDPOINT="http://tei" +helm dependency update +helm install retriever-usvc . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/retriever-usvc +helm dependency update +helm install retriever-usvc . --set tei.enabled=true --set redis-vector-db.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/retriever-usvc 7000:7000` to expose the retriever-usvc service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------------- | ------ | ---------------------- | ----------- | +| image.repository | string | `"opea/retriever-tgi"` | | +| service.port | string | `"7000"` | | +| REDIS_URL | string | `""` | | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | +| global.monitoring | bool | `false` | | + +## Milvus support + +Refer to the milvus-values.yaml for milvus configurations. diff --git a/helm-charts/retriever-usvc/ci-values.yaml b/helm-charts/retriever-usvc/ci-values.yaml new file mode 100644 index 000000000..cbc29c7ee --- /dev/null +++ b/helm-charts/retriever-usvc/ci-values.yaml @@ -0,0 +1,13 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for retriever-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: true +redis-vector-db: + enabled: true +milvus: + enabled: false diff --git a/helm-charts/retriever-usvc/milvus-values.yaml b/helm-charts/retriever-usvc/milvus-values.yaml new file mode 100644 index 000000000..c186b4be2 --- /dev/null +++ b/helm-charts/retriever-usvc/milvus-values.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for retriever-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +milvus: + enabled: true + cluster: + enabled: false + etcd: + replicaCount: 1 + pulsar: + enabled: false + minio: + mode: standalone +redis-vector-db: + enabled: false +tei: + enabled: true + +image: + repository: opea/retriever-milvus +port: 7000 +# text embedding inference service URL, e.g. http://: +#TEI_EMBEDDING_ENDPOINT: "http://dataprep-tei:80" +# milvus DB configurations +#MILVUS_HOST: "dataprep-milvus" +MILVUS_PORT: "19530" +COLLECTION_NAME: "rag_milvus" +MOSEC_EMBEDDING_ENDPOINT: "" +MOSEC_EMBEDDING_MODEL: "" diff --git a/helm-charts/retriever-usvc/templates/_helpers.tpl b/helm-charts/retriever-usvc/templates/_helpers.tpl new file mode 100644 index 000000000..18c740303 --- /dev/null +++ b/helm-charts/retriever-usvc/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "retriever-usvc.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "retriever-usvc.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "retriever-usvc.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "retriever-usvc.labels" -}} +helm.sh/chart: {{ include "retriever-usvc.chart" . }} +{{ include "retriever-usvc.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "retriever-usvc.selectorLabels" -}} +app.kubernetes.io/name: {{ include "retriever-usvc.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "retriever-usvc.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "retriever-usvc.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/retriever-usvc/templates/configmap.yaml b/helm-charts/retriever-usvc/templates/configmap.yaml new file mode 100644 index 000000000..6ada51029 --- /dev/null +++ b/helm-charts/retriever-usvc/templates/configmap.yaml @@ -0,0 +1,49 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "retriever-usvc.fullname" . }}-config + labels: + {{- include "retriever-usvc.labels" . | nindent 4 }} +data: + {{- if .Values.MOSEC_EMBEDDING_ENDPOINT }} + MOSEC_EMBEDDING_ENDPOINT: {{ .Values.MOSEC_EMBEDDING_ENDPOINT | quote}} + MOSEC_EMBEDDING_MODEL: {{ .Values.MOSEC_EMBEDDING_MODEL | quote}} + {{- else if .Values.TEI_EMBEDDING_ENDPOINT }} + TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote }} + {{- else if not .Values.LOCAL_EMBEDDING_MODEL }} + TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei" + {{- end }} + {{- if .Values.LOCAL_EMBEDDING_MODEL }} + EMBED_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }} + LOCAL_EMBEDDING_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }} + {{- end }} + {{- if .Values.REDIS_URL }} + REDIS_URL: {{ .Values.REDIS_URL | quote}} + {{- else }} + REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379" + {{- end }} + INDEX_NAME: {{ .Values.INDEX_NAME | quote }} + {{- if .Values.MILVUS_HOST }} + MILVUS_HOST: {{ .Values.MILVUS_HOST | quote }} + {{- else }} + MILVUS_HOST: "{{ .Release.Name }}-milvus" + {{- end }} + MILVUS: {{ .Values.MILVUS_HOST | quote }} + MILVUS_PORT: {{ .Values.MILVUS_PORT | quote }} + {{- if .Values.COLLECTION_NAME }} + COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }} + {{- end }} + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.REDIS_URL) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }} + no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + HF_HOME: "/tmp/.cache/huggingface" + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/retriever-usvc/templates/deployment.yaml b/helm-charts/retriever-usvc/templates/deployment.yaml new file mode 100644 index 000000000..b491b5d4d --- /dev/null +++ b/helm-charts/retriever-usvc/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "retriever-usvc.fullname" . }} + labels: + {{- include "retriever-usvc.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "retriever-usvc.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "retriever-usvc.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "retriever-usvc.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: retriever-usvc + containerPort: {{ .Values.port }} + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "retriever-usvc.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/retriever-usvc/templates/service.yaml b/helm-charts/retriever-usvc/templates/service.yaml new file mode 100644 index 000000000..e127bf858 --- /dev/null +++ b/helm-charts/retriever-usvc/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "retriever-usvc.fullname" . }} + labels: + {{- include "retriever-usvc.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.port }} + protocol: TCP + name: retriever-usvc + selector: + {{- include "retriever-usvc.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/retriever-usvc/templates/servicemonitor.yaml b/helm-charts/retriever-usvc/templates/servicemonitor.yaml new file mode 100644 index 000000000..2cfede645 --- /dev/null +++ b/helm-charts/retriever-usvc/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "retriever-usvc.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "retriever-usvc.selectorLabels" . | nindent 6 }} + endpoints: + - port: retriever-usvc + interval: 5s +{{- end }} diff --git a/helm-charts/retriever-usvc/templates/tests/test-pod.yaml b/helm-charts/retriever-usvc/templates/tests/test-pod.yaml new file mode 100644 index 000000000..3de01889d --- /dev/null +++ b/helm-charts/retriever-usvc/templates/tests/test-pod.yaml @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "retriever-usvc.fullname" . }}-testpod" + labels: + {{- include "retriever-usvc.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)"); + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval -sS --fail-with-body \ + -X POST \ + -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/retriever-usvc/values.yaml b/helm-charts/retriever-usvc/values.yaml new file mode 100644 index 000000000..568f2c3f0 --- /dev/null +++ b/helm-charts/retriever-usvc/values.yaml @@ -0,0 +1,116 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for retriever-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: false +milvus: + enabled: false +redis-vector-db: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +TEI_EMBEDDING_ENDPOINT: "" +LOCAL_EMBEDDING_MODEL: "" + +REDIS_URL: "" +INDEX_NAME: "rag-redis" + +image: + repository: opea/retriever-redis + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +port: 7000 +service: + type: ClusterIP + # The default port for retriever service is 7000 + port: 7000 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: retriever-usvc + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# milvus DB configurations +MILVUS_HOST: "" +MILVUS_PORT: "" +COLLECTION_NAME: "" +MOSEC_EMBEDDING_ENDPOINT: "" +MOSEC_EMBEDDING_MODEL: "" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/speecht5/.helmignore b/helm-charts/speecht5/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/speecht5/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/speecht5/Chart.yaml b/helm-charts/speecht5/Chart.yaml new file mode 100644 index 000000000..e4e1e3ff8 --- /dev/null +++ b/helm-charts/speecht5/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: speecht5 +description: The Helm chart for deploying speecht5 as microservice +type: application +version: 0-latest +# The speecht5 microservice server version +appVersion: "v1.0" diff --git a/helm-charts/speecht5/README.md b/helm-charts/speecht5/README.md new file mode 100644 index 000000000..0a8b71165 --- /dev/null +++ b/helm-charts/speecht5/README.md @@ -0,0 +1,31 @@ +# speecht5 + +Helm chart for deploying speecht5 service. + +## Installing the Chart + +To install the chart, run the following: + +```console +export MODELDIR=/mnt/opea-models +helm install speecht5 speecht5 --set global.modelUseHostPath=${MODELDIR} +``` + +## Verify + +Use port-forward to access it from localhost. + +```console +kubectl port-forward service/speecht5 1234:7055 & +curl http://localhost:1234/v1/tts \ + -XPOST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ----------------- | ----------- | +| image.repository | string | `"opea/speecht5"` | | +| service.port | string | `"7055"` | | diff --git a/helm-charts/speecht5/ci-gaudi-values.yaml b/helm-charts/speecht5/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/speecht5/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/speecht5/ci-values.yaml b/helm-charts/speecht5/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/speecht5/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/speecht5/gaudi-values.yaml b/helm-charts/speecht5/gaudi-values.yaml new file mode 100644 index 000000000..aefd9f373 --- /dev/null +++ b/helm-charts/speecht5/gaudi-values.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for speecht5. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + repository: opea/speecht5-gaudi + tag: "latest" + +resources: + limits: + habana.ai/gaudi: 1 diff --git a/helm-charts/speecht5/templates/_helpers.tpl b/helm-charts/speecht5/templates/_helpers.tpl new file mode 100644 index 000000000..7db4f3fd2 --- /dev/null +++ b/helm-charts/speecht5/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "speecht5.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "speecht5.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "speecht5.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "speecht5.labels" -}} +helm.sh/chart: {{ include "speecht5.chart" . }} +{{ include "speecht5.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "speecht5.selectorLabels" -}} +app.kubernetes.io/name: {{ include "speecht5.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "speecht5.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "speecht5.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/speecht5/templates/configmap.yaml b/helm-charts/speecht5/templates/configmap.yaml new file mode 100644 index 000000000..d488542fb --- /dev/null +++ b/helm-charts/speecht5/templates/configmap.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "speecht5.fullname" . }}-config + labels: + {{- include "speecht5.labels" . | nindent 4 }} +data: + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + # TTS_MODEL_PATH: {{ .Values.TTS_MODEL_PATH | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + HUGGINGFACE_HUB_CACHE: "/data" diff --git a/helm-charts/speecht5/templates/deployment.yaml b/helm-charts/speecht5/templates/deployment.yaml new file mode 100644 index 000000000..6b5976b84 --- /dev/null +++ b/helm-charts/speecht5/templates/deployment.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "speecht5.fullname" . }} + labels: + {{- include "speecht5.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "speecht5.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "speecht5.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "speecht5.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: speecht5 + containerPort: 7055 + protocol: TCP + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "speecht5.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/speecht5/templates/service.yaml b/helm-charts/speecht5/templates/service.yaml new file mode 100644 index 000000000..393766c47 --- /dev/null +++ b/helm-charts/speecht5/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "speecht5.fullname" . }} + labels: + {{- include "speecht5.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 7055 + protocol: TCP + name: speecht5 + selector: + {{- include "speecht5.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/speecht5/templates/tests/test-pod.yaml b/helm-charts/speecht5/templates/tests/test-pod.yaml new file mode 100644 index 000000000..fd86ea9ae --- /dev/null +++ b/helm-charts/speecht5/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "speecht5.fullname" . }}-testpod" + labels: + {{- include "speecht5.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "speecht5.fullname" . }}:{{ .Values.service.port }}/v1/tts \ + -X POST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/speecht5/values.yaml b/helm-charts/speecht5/values.yaml new file mode 100644 index 000000000..96a8e452a --- /dev/null +++ b/helm-charts/speecht5/values.yaml @@ -0,0 +1,97 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for speecht5. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +TTS_MODEL_PATH: "microsoft/speecht5_tts" +# VOCODE_MODEL: "microsoft/speecht5_hifigan" + +image: + repository: opea/speecht5 + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for speecht5 service is 7055 + port: 7055 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: /v1/health + port: speecht5 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: /v1/health + port: speecht5 + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: /v1/health + port: speecht5 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: "" diff --git a/helm-charts/tei/.helmignore b/helm-charts/tei/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/tei/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/tei/Chart.yaml b/helm-charts/tei/Chart.yaml new file mode 100644 index 000000000..42e81c036 --- /dev/null +++ b/helm-charts/tei/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: tei +description: The Helm chart for HuggingFace Text Embedding Inference Server +type: application +version: 0-latest +# The HF TEI version +appVersion: "cpu-1.5" diff --git a/helm-charts/tei/README.md b/helm-charts/tei/README.md new file mode 100644 index 000000000..b21c2eb25 --- /dev/null +++ b/helm-charts/tei/README.md @@ -0,0 +1,45 @@ +# tei + +Helm chart for deploying Hugging Face Text Generation Inference service. + +## Installing the Chart + +To install the chart, run the following: + +```console +cd ${GenAIInfro_repo}/helm-charts/common +export MODELDIR=/mnt/opea-models +export MODELNAME="BAAI/bge-base-en-v1.5" +helm install tei tei --set global.modelUseHostPath=${MODELDIR} --set EMBEDDING_MODEL_ID=${MODELNAME} +``` + +By default, the tei service will downloading the "BAAI/bge-base-en-v1.5" which is about 1.1GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="/data/BAAI/bge-base-en-v1.5" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng. + +Then run the command `kubectl port-forward svc/tei 2081:80` to expose the tei service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EMBEDDING_MODEL_ID | string | `"BAAI/bge-base-en-v1.5"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"ghcr.io/huggingface/text-embeddings-inference"` | | +| image.tag | string | `"cpu-1.5"` | | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/tei/ci-gaudi-values.yaml b/helm-charts/tei/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/tei/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/tei/ci-values.yaml b/helm-charts/tei/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/tei/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/tei/gaudi-values.yaml b/helm-charts/tei/gaudi-values.yaml new file mode 100644 index 000000000..45627caf3 --- /dev/null +++ b/helm-charts/tei/gaudi-values.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tei. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/helm-charts/tei/templates/_helpers.tpl b/helm-charts/tei/templates/_helpers.tpl new file mode 100644 index 000000000..fc4a57438 --- /dev/null +++ b/helm-charts/tei/templates/_helpers.tpl @@ -0,0 +1,69 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "tei.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tei.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tei.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Convert chart name to a string suitable as metric prefix +*/}} +{{- define "tei.metricPrefix" -}} +{{- include "tei.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tei.labels" -}} +helm.sh/chart: {{ include "tei.chart" . }} +{{ include "tei.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tei.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tei.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "tei.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "tei.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/tei/templates/configmap.yaml b/helm-charts/tei/templates/configmap.yaml new file mode 100644 index 000000000..6b40613a1 --- /dev/null +++ b/helm-charts/tei/templates/configmap.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "tei.fullname" . }}-config + labels: + {{- include "tei.labels" . | nindent 4 }} +data: + MODEL_ID: {{ .Values.EMBEDDING_MODEL_ID | quote }} + PORT: {{ .Values.port | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }} + HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }} + {{- end }} + {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }} + HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }} + {{- end }} + # More options for HPU + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }} + {{- end }} + {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} + MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} + {{- end }} diff --git a/helm-charts/tei/templates/deployment.yaml b/helm-charts/tei/templates/deployment.yaml new file mode 100644 index 000000000..442847df5 --- /dev/null +++ b/helm-charts/tei/templates/deployment.yaml @@ -0,0 +1,122 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "tei.fullname" . }} + labels: + {{- include "tei.labels" . | nindent 4 }} +spec: + {{- if ne (int .Values.replicaCount) 1 }} + # remove if replica count should not be reset on pod update with HPA + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "tei.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "tei.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "tei.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: {{ .Values.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.shmSize }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.accelDevice }} + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated + terminationGracePeriodSeconds: 60 + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "tei.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml b/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 000000000..9b76e6ad4 --- /dev/null +++ b/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if and .Values.global.monitoring .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "tei.fullname" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "tei.fullname" . }} + minReplicas: 1 + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + - type: Object + object: + describedObject: + apiVersion: v1 + # get metric for named object of given type (in same namespace) + kind: Service + name: {{ include "tei.fullname" . }} + target: +{{- if .Values.accelDevice }} + # Metric is sum from all pods. "AverageValue" divides value returned from + # the custom metrics API by the number of Pods before comparing to the target: + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics + type: AverageValue + averageValue: 15 + metric: + name: {{ include "tei.metricPrefix" . }}_queue_size_sum +{{- else }} + # Metric is average for all the pods. To avoid replica fluctuation when pod + # startup + request processing takes longer than HPA evaluation period, this uses + # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type. + type: Value + value: 4 # seconds + metric: + name: {{ include "tei.metricPrefix" . }}_request_latency +{{- end }} + behavior: + scaleDown: + stabilizationWindowSeconds: 180 + policies: + - type: Percent + value: 25 + periodSeconds: 15 + scaleUp: + selectPolicy: Max + stabilizationWindowSeconds: 0 + policies: + - type: Percent + value: 50 + periodSeconds: 15 + - type: Pods + value: 2 + periodSeconds: 15 +{{- end }} diff --git a/helm-charts/tei/templates/service.yaml b/helm-charts/tei/templates/service.yaml new file mode 100644 index 000000000..a12a01dec --- /dev/null +++ b/helm-charts/tei/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "tei.fullname" . }} + labels: + {{- include "tei.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 80 + targetPort: {{ .Values.port }} + protocol: TCP + name: tei + selector: + {{- include "tei.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/tei/templates/servicemonitor.yaml b/helm-charts/tei/templates/servicemonitor.yaml new file mode 100644 index 000000000..96743442b --- /dev/null +++ b/helm-charts/tei/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "tei.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "tei.selectorLabels" . | nindent 6 }} + endpoints: + - port: tei + interval: 5s +{{- end }} diff --git a/helm-charts/tei/templates/tests/test-pod.yaml b/helm-charts/tei/templates/tests/test-pod.yaml new file mode 100644 index 000000000..700469beb --- /dev/null +++ b/helm-charts/tei/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "tei.fullname" . }}-testpod" + labels: + {{- include "tei.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "tei.fullname" . }}/embed -sS --fail-with-body \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/tei/values.yaml b/helm-charts/tei/values.yaml new file mode 100644 index 000000000..44460a359 --- /dev/null +++ b/helm-charts/tei/values.yaml @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tei. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +# Enabling HPA will: +# - Ignore above replica count, as it will be controlled by HPA +# - Add example HPA scaling rules with thresholds suitable for Xeon deployments +# - Require custom metrics ConfigMap available in the main application chart +autoscaling: + maxReplicas: 2 + enabled: false + +port: 2081 +shmSize: 1Gi +EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5" +HF_HUB_DISABLE_PROGRESS_BARS: "1" +HF_HUB_ENABLE_HF_TRANSFER: "0" + +image: + repository: ghcr.io/huggingface/text-embeddings-inference + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "cpu-1.5" + +# empty for CPU +accelDevice: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 + timeoutSeconds: 2 +readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 +startupProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: "" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/teirerank/.helmignore b/helm-charts/teirerank/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/teirerank/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/teirerank/Chart.yaml b/helm-charts/teirerank/Chart.yaml new file mode 100644 index 000000000..cc9c015f9 --- /dev/null +++ b/helm-charts/teirerank/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: teirerank +description: The Helm chart for HuggingFace Text Embedding Inference Server +type: application +version: 0-latest +# The HF TEI version +appVersion: "cpu-1.5" diff --git a/helm-charts/teirerank/README.md b/helm-charts/teirerank/README.md new file mode 100644 index 000000000..79202497d --- /dev/null +++ b/helm-charts/teirerank/README.md @@ -0,0 +1,48 @@ +# teirerank + +Helm chart for deploying Hugging Face Text Generation Inference service. + +## Installing the Chart + +To install the chart, run the following: + +```console +cd ${GenAIInfro_repo}/helm-charts/common +export MODELDIR=/mnt/opea-models +export MODELNAME="BAAI/bge-reranker-base" +helm install teirerank teirerank --set global.modelUseHostPath=${MODELDIR} --set RERANK_MODEL_ID=${MODELNAME} +``` + +By default, the teirerank service will downloading the "BAAI/bge-reranker-base" which is about 1.1GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="/data/BAAI/bge-reranker-base" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng. + +Then run the command `kubectl port-forward svc/teirerank 2082:80` to expose the tei service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:2082/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| RERANK_MODEL_ID | string | `"BAAI/bge-reranker-base"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"ghcr.io/huggingface/text-embeddings-inference"` | | +| image.tag | string | `"cpu-1.5"` | | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/teirerank/ci-values.yaml b/helm-charts/teirerank/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/teirerank/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/teirerank/gaudi-values.yaml b/helm-charts/teirerank/gaudi-values.yaml new file mode 100644 index 000000000..a5d6c1b14 --- /dev/null +++ b/helm-charts/teirerank/gaudi-values.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for teirerank. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +accelDevice: "gaudi" + +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +MAX_WARMUP_SEQUENCE_LENGTH: "512" +image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + +securityContext: + readOnlyRootFilesystem: false + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + timeoutSeconds: 1 +readinessProbe: + timeoutSeconds: 1 diff --git a/helm-charts/teirerank/templates/_helpers.tpl b/helm-charts/teirerank/templates/_helpers.tpl new file mode 100644 index 000000000..0c0b9238f --- /dev/null +++ b/helm-charts/teirerank/templates/_helpers.tpl @@ -0,0 +1,69 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "teirerank.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "teirerank.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "teirerank.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Convert chart name to a string suitable as metric prefix +*/}} +{{- define "teirerank.metricPrefix" -}} +{{- include "teirerank.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "teirerank.labels" -}} +helm.sh/chart: {{ include "teirerank.chart" . }} +{{ include "teirerank.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "teirerank.selectorLabels" -}} +app.kubernetes.io/name: {{ include "teirerank.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "teirerank.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "teirerank.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/teirerank/templates/configmap.yaml b/helm-charts/teirerank/templates/configmap.yaml new file mode 100644 index 000000000..f8c25bc0b --- /dev/null +++ b/helm-charts/teirerank/templates/configmap.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "teirerank.fullname" . }}-config + labels: + {{- include "teirerank.labels" . | nindent 4 }} +data: + MODEL_ID: {{ .Values.RERANK_MODEL_ID | quote }} + PORT: {{ .Values.port | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + NUMBA_CACHE_DIR: "/tmp" + TRANSFORMERS_CACHE: "/tmp/transformers_cache" + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }} + HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }} + {{- end }} + {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }} + HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }} + {{- end }} + # More options for HPU + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }} + {{- end }} + {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} + MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} + {{- end }} diff --git a/helm-charts/teirerank/templates/deployment.yaml b/helm-charts/teirerank/templates/deployment.yaml new file mode 100644 index 000000000..2d0ec9992 --- /dev/null +++ b/helm-charts/teirerank/templates/deployment.yaml @@ -0,0 +1,122 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "teirerank.fullname" . }} + labels: + {{- include "teirerank.labels" . | nindent 4 }} +spec: + {{- if ne (int .Values.replicaCount) 1 }} + # remove if replica count should not be reset on pod update with HPA + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "teirerank.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "teirerank.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "teirerank.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + args: + - "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: {{ .Values.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.shmSize }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.accelDevice }} + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated + terminationGracePeriodSeconds: 60 + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "teirerank.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml b/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 000000000..b8a881934 --- /dev/null +++ b/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if and .Values.global.monitoring .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "teirerank.fullname" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "teirerank.fullname" . }} + minReplicas: 1 + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + - type: Object + object: + describedObject: + apiVersion: v1 + # get metric for named object of given type (in same namespace) + kind: Service + name: {{ include "teirerank.fullname" . }} + target: +{{- if .Values.accelDevice }} + # Metric is sum from all pods. "AverageValue" divides value returned from + # the custom metrics API by the number of Pods before comparing to the target: + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics + type: AverageValue + averageValue: 15 + metric: + name: {{ include "teirerank.metricPrefix" . }}_queue_size_sum +{{- else }} + # Metric is average for all the pods. To avoid replica fluctuation when pod + # startup + request processing takes longer than HPA evaluation period, this uses + # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type. + type: Value + value: 4 # seconds + metric: + name: {{ include "teirerank.metricPrefix" . }}_request_latency +{{- end }} + behavior: + scaleDown: + stabilizationWindowSeconds: 180 + policies: + - type: Percent + value: 25 + periodSeconds: 15 + scaleUp: + selectPolicy: Max + stabilizationWindowSeconds: 0 + policies: + - type: Percent + value: 50 + periodSeconds: 15 + - type: Pods + value: 2 + periodSeconds: 15 +{{- end }} diff --git a/helm-charts/teirerank/templates/service.yaml b/helm-charts/teirerank/templates/service.yaml new file mode 100644 index 000000000..a3616b6b0 --- /dev/null +++ b/helm-charts/teirerank/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "teirerank.fullname" . }} + labels: + {{- include "teirerank.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 80 + targetPort: {{ .Values.port }} + protocol: TCP + name: teirerank + selector: + {{- include "teirerank.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/teirerank/templates/servicemonitor.yaml b/helm-charts/teirerank/templates/servicemonitor.yaml new file mode 100644 index 000000000..13110adbe --- /dev/null +++ b/helm-charts/teirerank/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "teirerank.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "teirerank.selectorLabels" . | nindent 6 }} + endpoints: + - port: teirerank + interval: 5s +{{- end }} diff --git a/helm-charts/teirerank/templates/tests/test-pod.yaml b/helm-charts/teirerank/templates/tests/test-pod.yaml new file mode 100644 index 000000000..225fe2f58 --- /dev/null +++ b/helm-charts/teirerank/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "teirerank.fullname" . }}-testpod" + labels: + {{- include "teirerank.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "teirerank.fullname" . }}/rerank -sS --fail-with-body \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' -H 'Content-Type: application/json' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/teirerank/values.yaml b/helm-charts/teirerank/values.yaml new file mode 100644 index 000000000..4d0dedf29 --- /dev/null +++ b/helm-charts/teirerank/values.yaml @@ -0,0 +1,117 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for teirerank. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +# Enabling HPA will: +# - Ignore above replica count, as it will be controlled by HPA +# - Add example HPA scaling rules with thresholds suitable for Xeon deployments +# - Require custom metrics ConfigMap available in the main application chart +autoscaling: + maxReplicas: 3 + enabled: false + +port: 2082 +shmSize: 1Gi +RERANK_MODEL_ID: "BAAI/bge-reranker-base" +HF_HUB_DISABLE_PROGRESS_BARS: "1" +HF_HUB_ENABLE_HF_TRANSFER: "0" + +image: + repository: ghcr.io/huggingface/text-embeddings-inference + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "cpu-1.5" + +# empty for CPU +accelDevice: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 + failureThreshold: 24 +readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 +startupProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: "" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/tgi/.helmignore b/helm-charts/tgi/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/tgi/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/tgi/Chart.yaml b/helm-charts/tgi/Chart.yaml new file mode 100644 index 000000000..9836a198b --- /dev/null +++ b/helm-charts/tgi/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: tgi +description: The Helm chart for HuggingFace Text Generation Inference Server +type: application +version: 0-latest +# The HF TGI version +appVersion: "2.1.0" diff --git a/helm-charts/tgi/README.md b/helm-charts/tgi/README.md new file mode 100644 index 000000000..27acd96e8 --- /dev/null +++ b/helm-charts/tgi/README.md @@ -0,0 +1,52 @@ +# tgi + +Helm chart for deploying Hugging Face Text Generation Inference service. + +## Installing the Chart + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common +export MODELDIR=/mnt/opea-models +export MODELNAME="bigscience/bloom-560m" +export HFTOKEN="insert-your-huggingface-token-here" +helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +# To deploy on Gaudi enabled kubernetes cluster +# helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +``` + +By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="/data/models--bigscience--bloom-560m" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng. + +Then run the command `kubectl port-forward svc/tgi 2080:80` to expose the tgi service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:2080/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"bigscience/bloom-560m"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"ghcr.io/huggingface/text-generation-inference"` | | +| image.tag | string | `"1.4"` | | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/tgi/ci-gaudi-values.yaml b/helm-charts/tgi/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/tgi/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/tgi/ci-values.yaml b/helm-charts/tgi/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/tgi/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/tgi/gaudi-values.yaml b/helm-charts/tgi/gaudi-values.yaml new file mode 100644 index 000000000..9c46415cc --- /dev/null +++ b/helm-charts/tgi/gaudi-values.yaml @@ -0,0 +1,39 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tgi. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +accelDevice: "gaudi" + +image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.6" + +MAX_INPUT_LENGTH: "1024" +MAX_TOTAL_TOKENS: "2048" +CUDA_GRAPHS: "" +OMPI_MCA_btl_vader_single_copy_mechanism: "none" +ENABLE_HPU_GRAPH: "true" +LIMIT_HPU_GRAPH: "true" +USE_FLASH_ATTENTION: "true" +FLASH_ATTENTION_RECOMPUTE: "true" + +resources: + limits: + habana.ai/gaudi: 1 + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/tgi/nv-values.yaml b/helm-charts/tgi/nv-values.yaml new file mode 100644 index 000000000..d073ffb9c --- /dev/null +++ b/helm-charts/tgi/nv-values.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tgi. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +accelDevice: "nvidia" + +image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "2.2.0" + +resources: + limits: + nvidia.com/gpu: 1 + +CUDA_GRAPHS: "" + +livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/tgi/templates/_helpers.tpl b/helm-charts/tgi/templates/_helpers.tpl new file mode 100644 index 000000000..b672e8309 --- /dev/null +++ b/helm-charts/tgi/templates/_helpers.tpl @@ -0,0 +1,69 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "tgi.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tgi.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tgi.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Convert chart name to a string suitable as metric prefix +*/}} +{{- define "tgi.metricPrefix" -}} +{{- include "tgi.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tgi.labels" -}} +helm.sh/chart: {{ include "tgi.chart" . }} +{{ include "tgi.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tgi.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tgi.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "tgi.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/tgi/templates/configmap.yaml b/helm-charts/tgi/templates/configmap.yaml new file mode 100644 index 000000000..82be971e4 --- /dev/null +++ b/helm-charts/tgi/templates/configmap.yaml @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "tgi.fullname" . }}-config + labels: + {{- include "tgi.labels" . | nindent 4 }} +data: + MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }} + PORT: {{ .Values.port | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- if contains "tgi-gaudi" .Values.image.repository }} + HABANA_LOGS: "/tmp/habana_logs" + {{- end }} + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.MAX_INPUT_LENGTH }} + MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }} + {{- end }} + {{- if .Values.MAX_TOTAL_TOKENS }} + MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }} + {{- end }} + {{- if .Values.CUDA_GRAPHS }} + CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }} + {{- end }} + {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }} + HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }} + {{- end }} + {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }} + HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }} + {{- end }} + # More options for HPU + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }} + {{- end }} + {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }} + PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }} + {{- end }} + {{- if .Values.ENABLE_HPU_GRAPH }} + ENABLE_HPU_GRAPH: {{ .Values.ENABLE_HPU_GRAPH | quote }} + {{- end }} + {{- if .Values.LIMIT_HPU_GRAPH }} + LIMIT_HPU_GRAPH: {{ .Values.LIMIT_HPU_GRAPH | quote }} + {{- end }} + {{- if .Values.USE_FLASH_ATTENTION }} + USE_FLASH_ATTENTION: {{ .Values.USE_FLASH_ATTENTION | quote }} + {{- end }} + {{- if .Values.FLASH_ATTENTION_RECOMPUTE }} + FLASH_ATTENTION_RECOMPUTE: {{ .Values.FLASH_ATTENTION_RECOMPUTE | quote }} + {{- end }} diff --git a/helm-charts/tgi/templates/deployment.yaml b/helm-charts/tgi/templates/deployment.yaml new file mode 100644 index 000000000..04436e613 --- /dev/null +++ b/helm-charts/tgi/templates/deployment.yaml @@ -0,0 +1,126 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "tgi.fullname" . }} + labels: + {{- include "tgi.labels" . | nindent 4 }} +spec: + {{- if ne (int .Values.replicaCount) 1 }} + # remove if replica count should not be reset on pod update with HPA + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "tgi.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "tgi.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "tgi.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + {{- if .Values.extraCmdArgs }} + args: + {{- range .Values.extraCmdArgs }} + - {{ . | quote }} + {{- end }} + {{- end }} + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: {{ .Values.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.shmSize }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.accelDevice }} + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated + terminationGracePeriodSeconds: 120 + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "tgi.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml b/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 000000000..f0338e48d --- /dev/null +++ b/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if and .Values.global.monitoring .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "tgi.fullname" . }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "tgi.fullname" . }} + minReplicas: 1 + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + - type: Object + object: + describedObject: + apiVersion: v1 + # get metric for named object of given type (in same namespace) + kind: Service + name: {{ include "tgi.fullname" . }} + target: +{{- if .Values.accelDevice }} + # Metric is sum from all pods. "AverageValue" divides value returned from + # the custom metrics API by the number of Pods before comparing to the target: + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details + # https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics + type: AverageValue + averageValue: 15 + metric: + name: {{ include "tgi.metricPrefix" . }}_queue_size_sum +{{- else }} + # Metric is average for all the pods. To avoid replica fluctuation when pod + # startup + request processing takes longer than HPA evaluation period, this uses + # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type. + type: Value + value: 4 # seconds + metric: + name: {{ include "tgi.metricPrefix" . }}_request_latency +{{- end }} + behavior: + scaleDown: + stabilizationWindowSeconds: 180 + policies: + - type: Percent + value: 25 + periodSeconds: 90 + scaleUp: + selectPolicy: Max + stabilizationWindowSeconds: 0 + policies: + # Slow linear rampup in case additional CPU pods go to same node + # (i.e. interfere with each other) + - type: Pods + value: 1 + periodSeconds: 90 + #- type: Percent + # value: 25 + # periodSeconds: 90 +{{- end }} diff --git a/helm-charts/tgi/templates/service.yaml b/helm-charts/tgi/templates/service.yaml new file mode 100644 index 000000000..011cc37ec --- /dev/null +++ b/helm-charts/tgi/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "tgi.fullname" . }} + labels: + {{- include "tgi.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 80 + targetPort: {{ .Values.port }} + protocol: TCP + name: tgi + selector: + {{- include "tgi.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/tgi/templates/servicemonitor.yaml b/helm-charts/tgi/templates/servicemonitor.yaml new file mode 100644 index 000000000..978174226 --- /dev/null +++ b/helm-charts/tgi/templates/servicemonitor.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Dashboard for the exposed TGI metrics: +# - https://grafana.com/grafana/dashboards/19831-text-generation-inference-dashboard/ +# Metric descriptions: +# - https://github.com/huggingface/text-generation-inference/discussions/1127#discussioncomment-7240527 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "tgi.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "tgi.selectorLabels" . | nindent 6 }} + endpoints: + - port: tgi + interval: 5s +{{- end }} diff --git a/helm-charts/tgi/templates/tests/test-pod.yaml b/helm-charts/tgi/templates/tests/test-pod.yaml new file mode 100644 index 000000000..948f23860 --- /dev/null +++ b/helm-charts/tgi/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "tgi.fullname" . }}-testpod" + labels: + {{- include "tgi.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "tgi.fullname" . }}/generate -sS --fail-with-body \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/tgi/values.yaml b/helm-charts/tgi/values.yaml new file mode 100644 index 000000000..d58db193a --- /dev/null +++ b/helm-charts/tgi/values.yaml @@ -0,0 +1,147 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tgi. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +# Enabling HPA will: +# - Ignore above replica count, as it will be controlled by HPA +# - Add example HPA scaling rules with thresholds suitable for Xeon deployments +# - Require custom metrics ConfigMap available in the main application chart +autoscaling: + maxReplicas: 4 + enabled: false + +port: 2080 +shmSize: 1Gi + +# Set extraCmdArgs if you need to pass additional parameters to TGI for performance +# Refer to https://huggingface.co/docs/text-generation-inference/en/reference/launcher for more options. +# extraCmdArgs: ["--dtype","bfloat16"] + +image: + repository: ghcr.io/huggingface/text-generation-inference + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "2.4.0-intel-cpu" + +# empty for CPU +accelDevice: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# Use TCP probe instead of HTTP due to bug #483 +# https://github.com/opea-project/GenAIExamples/issues/483 +livenessProbe: + tcpSocket: + port: http + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 + failureThreshold: 24 +readinessProbe: + tcpSocket: + port: http + initialDelaySeconds: 16 + periodSeconds: 8 + timeoutSeconds: 4 +startupProbe: + tcpSocket: + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 180 + timeoutSeconds: 2 +# livenessProbe: +# httpGet: +# path: /health +# port: http +# initialDelaySeconds: 5 +# periodSeconds: 5 +# failureThreshold: 24 +# readinessProbe: +# httpGet: +# path: /health +# port: http +# initialDelaySeconds: 5 +# periodSeconds: 5 +# startupProbe: +# httpGet: +# path: /health +# port: http +# initialDelaySeconds: 5 +# periodSeconds: 5 +# failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + +MAX_INPUT_LENGTH: "" +MAX_TOTAL_TOKENS: "" +CUDA_GRAPHS: "0" +HF_HUB_DISABLE_PROGRESS_BARS: "1" +HF_HUB_ENABLE_HF_TRANSFER: "0" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: "" + + # Install Prometheus serviceMonitor for service + monitoring: false + + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/tts/.helmignore b/helm-charts/tts/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/tts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/tts/Chart.yaml b/helm-charts/tts/Chart.yaml new file mode 100644 index 000000000..3d0e1462d --- /dev/null +++ b/helm-charts/tts/Chart.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: tts +description: The Helm chart for deploying tts as microservice +type: application +version: 0-latest +# The tts microservice server version +appVersion: "v1.0" + +dependencies: + - name: speecht5 + version: 0-latest + repository: file://../speecht5 + condition: speecht5.enabled diff --git a/helm-charts/tts/README.md b/helm-charts/tts/README.md new file mode 100644 index 000000000..24f3c6cfd --- /dev/null +++ b/helm-charts/tts/README.md @@ -0,0 +1,51 @@ +# tts + +Helm chart for deploying tts microservice. + +tts depends on speecht5, you should set TTS_ENDPOINT endpoints before start. + +## (Option1): Installing the chart separately + +First, you need to install the speecht5 chart, please refer to the [speecht5](../speecht5) chart for more information. + +After you've deployted the speecht5 chart successfully, please run `kubectl get svc` to get the speecht5 service endpoint, i.e. `http://speecht5:7055`. + +To install the tts chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/tts +export TTS_ENDPOINT="http://speecht5:7055" +helm dependency update +helm install tts . --set TTS_ENDPOINT=${TTS_ENDPOINT} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/tts +helm dependency update +helm install tts . --set speecht5.enabled=true +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/tts 9088:9088` to expose the tts service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:9088/v1/audio/speech \ + -XPOST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ------------ | ----------- | +| image.repository | string | `"opea/tts"` | | +| service.port | string | `"9088"` | | +| TTS_ENDPOINT | string | `""` | | diff --git a/helm-charts/tts/ci-values.yaml b/helm-charts/tts/ci-values.yaml new file mode 100644 index 000000000..8eda0bf5b --- /dev/null +++ b/helm-charts/tts/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tts. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +speecht5: + enabled: true diff --git a/helm-charts/tts/templates/_helpers.tpl b/helm-charts/tts/templates/_helpers.tpl new file mode 100644 index 000000000..22b5e576a --- /dev/null +++ b/helm-charts/tts/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "tts.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tts.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tts.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tts.labels" -}} +helm.sh/chart: {{ include "tts.chart" . }} +{{ include "tts.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tts.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tts.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "tts.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "tts.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/tts/templates/configmap.yaml b/helm-charts/tts/templates/configmap.yaml new file mode 100644 index 000000000..96fd677e7 --- /dev/null +++ b/helm-charts/tts/templates/configmap.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "tts.fullname" . }}-config + labels: + {{- include "tts.labels" . | nindent 4 }} +data: + {{- if .Values.TTS_ENDPOINT }} + TTS_ENDPOINT: {{ .Values.TTS_ENDPOINT | quote}} + {{- else }} + TTS_ENDPOINT: "http://{{ .Release.Name }}-speecht5:7055" + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TTS_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-speecht5,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/tts/templates/deployment.yaml b/helm-charts/tts/templates/deployment.yaml new file mode 100644 index 000000000..26da197a1 --- /dev/null +++ b/helm-charts/tts/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "tts.fullname" . }} + labels: + {{- include "tts.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "tts.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "tts.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "tts.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: tts + containerPort: 9088 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "tts.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/tts/templates/service.yaml b/helm-charts/tts/templates/service.yaml new file mode 100644 index 000000000..f2d261e6a --- /dev/null +++ b/helm-charts/tts/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "tts.fullname" . }} + labels: + {{- include "tts.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 9088 + protocol: TCP + name: tts + selector: + {{- include "tts.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/tts/templates/tests/test-pod.yaml b/helm-charts/tts/templates/tests/test-pod.yaml new file mode 100644 index 000000000..26fcbcca3 --- /dev/null +++ b/helm-charts/tts/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "tts.fullname" . }}-testpod" + labels: + {{- include "tts.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "tts.fullname" . }}:{{ .Values.service.port }}/v1/audio/speech \ + -XPOST \ + -d '{"text": "Who are you?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/tts/values.yaml b/helm-charts/tts/values.yaml new file mode 100644 index 000000000..39214480c --- /dev/null +++ b/helm-charts/tts/values.yaml @@ -0,0 +1,93 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for tts. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +speecht5: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +TTS_ENDPOINT: "" + +image: + repository: opea/tts + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for tts service is 9088 + port: 9088 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: tts + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: tts + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: tts + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/ui/.helmignore b/helm-charts/ui/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/ui/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/ui/Chart.yaml b/helm-charts/ui/Chart.yaml new file mode 100644 index 000000000..72b9b43fe --- /dev/null +++ b/helm-charts/ui/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: ui +description: Common Helm chart for the UI for various opea workload +type: application +version: 0-latest +appVersion: "v1.0" diff --git a/helm-charts/ui/ci-values.yaml b/helm-charts/ui/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/ui/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml b/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml new file mode 120000 index 000000000..83ad22951 --- /dev/null +++ b/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml @@ -0,0 +1 @@ +variant_chatqna_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_codegen_react-values.yaml b/helm-charts/ui/ci-variant_codegen_react-values.yaml new file mode 120000 index 000000000..dca8edda6 --- /dev/null +++ b/helm-charts/ui/ci-variant_codegen_react-values.yaml @@ -0,0 +1 @@ +variant_codegen_react-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_codegen_svelte-values.yaml b/helm-charts/ui/ci-variant_codegen_svelte-values.yaml new file mode 120000 index 000000000..92b8a5e59 --- /dev/null +++ b/helm-charts/ui/ci-variant_codegen_svelte-values.yaml @@ -0,0 +1 @@ +variant_codegen_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml b/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml new file mode 120000 index 000000000..8034cb024 --- /dev/null +++ b/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml @@ -0,0 +1 @@ +variant_codetrans_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_docsum_react-values.yaml b/helm-charts/ui/ci-variant_docsum_react-values.yaml new file mode 120000 index 000000000..a06c0a483 --- /dev/null +++ b/helm-charts/ui/ci-variant_docsum_react-values.yaml @@ -0,0 +1 @@ +variant_docsum_react-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_docsum_svelte-values.yaml b/helm-charts/ui/ci-variant_docsum_svelte-values.yaml new file mode 120000 index 000000000..c0a3e63ff --- /dev/null +++ b/helm-charts/ui/ci-variant_docsum_svelte-values.yaml @@ -0,0 +1 @@ +variant_docsum_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_faqgen_react-values.yaml b/helm-charts/ui/ci-variant_faqgen_react-values.yaml new file mode 120000 index 000000000..29846e96d --- /dev/null +++ b/helm-charts/ui/ci-variant_faqgen_react-values.yaml @@ -0,0 +1 @@ +variant_faqgen_react-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml b/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml new file mode 120000 index 000000000..f1976d3a9 --- /dev/null +++ b/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml @@ -0,0 +1 @@ +variant_faqgen_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml b/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml new file mode 120000 index 000000000..b14d59afd --- /dev/null +++ b/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml @@ -0,0 +1 @@ +variant_visualqna_svelte-values.yaml \ No newline at end of file diff --git a/helm-charts/ui/templates/_helpers.tpl b/helm-charts/ui/templates/_helpers.tpl new file mode 100644 index 000000000..07e404849 --- /dev/null +++ b/helm-charts/ui/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ui.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ui.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ui.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ui.labels" -}} +helm.sh/chart: {{ include "ui.chart" . }} +{{ include "ui.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ui.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ui.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ui.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "ui.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/ui/templates/configmap.yaml b/helm-charts/ui/templates/configmap.yaml new file mode 100644 index 000000000..af6cf6fde --- /dev/null +++ b/helm-charts/ui/templates/configmap.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "ui.fullname" . }}-config + labels: + {{- include "ui.labels" . | nindent 4 }} +data: + {{- if contains "codegen-react-ui" .Values.image.repository }} + VITE_CODE_GEN_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "codegen-ui" .Values.image.repository }} + BASIC_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "codetrans-ui" .Values.image.repository }} + BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if (contains "docsum-ui" .Values.image.repository) }} + DOC_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + BACKEND_SERVICE_ENDPOINT: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if (contains "docsum-react-ui" .Values.image.repository) }} + VITE_DOC_SUM_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "chatqna-ui" .Values.image.repository }} + CHAT_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + UPLOAD_FILE_BASE_URL: {{ .Values.DATAPREP_SERVICE_ENDPOINT | quote }} + GET_FILE: {{ .Values.DATAPREP_GET_FILE_ENDPOINT | quote }} + DELETE_FILE: {{ .Values.DATAPREP_DELETE_FILE_ENDPOINT | quote }} + {{- else if contains "chatqna-conversation-ui" .Values.image.repository}} + APP_BACKEND_SERVICE_ENDPOINT: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + APP_DATA_PREP_SERVICE_URL: {{ .Values.DATAPREP_SERVICE_ENDPOINT | quote }} + APP_DATA_PREP_GET_FILE_URL: {{ .Values.DATAPREP_GET_FILE_ENDPOINT | quote }} + APP_DATA_PREP_DELETE_FILE_URL: {{ .Values.DATAPREP_DELETE_FILE_ENDPOINT | quote }} + {{- else if contains "visualqna-ui" .Values.image.repository }} + BACKEND_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "audioqna-ui" .Values.image.repository }} + CHAT_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "faqgen-react-ui" .Values.image.repository }} + VITE_FAQ_GEN_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else if contains "faqgen-ui" .Values.image.repository }} + FAQ_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }} + {{- else }} + {{- fail "Unsupported ui image: " .Values.image.repository }} + {{- end }} diff --git a/helm-charts/ui/templates/deployment.yaml b/helm-charts/ui/templates/deployment.yaml new file mode 100644 index 000000000..5bf8231bb --- /dev/null +++ b/helm-charts/ui/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ui.fullname" . }} + labels: + {{- include "ui.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "ui.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "ui.labels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "ui.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: ui + containerPort: {{ .Values.containerPort }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - mountPath: /tmp + name: tmp + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "chatqna-ui.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/ui/templates/service.yaml b/helm-charts/ui/templates/service.yaml new file mode 100644 index 000000000..0757f090a --- /dev/null +++ b/helm-charts/ui/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ui.fullname" . }} + labels: + {{- include "ui.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: ui + protocol: TCP + name: ui + selector: + {{- include "ui.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/ui/templates/tests/test-pod.yaml b/helm-charts/ui/templates/tests/test-pod.yaml new file mode 100644 index 000000000..5c320d599 --- /dev/null +++ b/helm-charts/ui/templates/tests/test-pod.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "ui.fullname" . }}-testpod" + labels: + {{- include "ui.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "ui.fullname" . }}:{{ .Values.service.port }} -sS --fail-with-body -X GET && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/ui/values.yaml b/helm-charts/ui/values.yaml new file mode 100644 index 000000000..4e0beddf4 --- /dev/null +++ b/helm-charts/ui/values.yaml @@ -0,0 +1,99 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for conversational-ui. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: opea/chatqna-conversation-ui + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # Currently comment the securityContext settings + # as the inappropriate settings within Conversational UI (need root privilege) + # issue: https://github.com/opea-project/GenAIExamples/issues/517 + # readOnlyRootFilesystem: true + # allowPrivilegeEscalation: false + # runAsNonRoot: true + # runAsUser: 1000 + # capabilities: + # drop: + # - ALL + # seccompProfile: + # type: RuntimeDefault + +# internal container port +# conventional ui container port: 5173, conversational ui container port: 80 +containerPort: 80 + +service: + type: ClusterIP + port: 5174 + +livenessProbe: + httpGet: + path: / + port: ui + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: / + port: ui + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: / + port: ui + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# chatQnA backend service URL, default to Mega backend service +BACKEND_SERVICE_ENDPOINT: "/v1/chatqna" + +# data preparation service URL, default to Mega data preparation service +DATAPREP_SERVICE_ENDPOINT: "/v1/dataprep" + +# data preparation get file service URL, default to Mega data preparation service +DATAPREP_GET_FILE_ENDPOINT: "/v1/dataprep/get_file" + +# data preparation delete file service URL, default to Mega data preparation service +DATAPREP_DELETE_FILE_ENDPOINT: "/v1/dataprep/delete_file" + +global: {} diff --git a/helm-charts/ui/variant_chatqna_svelte-values.yaml b/helm-charts/ui/variant_chatqna_svelte-values.yaml new file mode 100644 index 000000000..ab688a5ab --- /dev/null +++ b/helm-charts/ui/variant_chatqna_svelte-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/chatqna-ui + tag: "latest" + +containerPort: 5173 diff --git a/helm-charts/ui/variant_codegen_react-values.yaml b/helm-charts/ui/variant_codegen_react-values.yaml new file mode 100644 index 000000000..37a2ee2b0 --- /dev/null +++ b/helm-charts/ui/variant_codegen_react-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/codegen-react-ui + tag: "latest" + +BACKEND_SERVICE_ENDPOINT: "/v1/codegen" diff --git a/helm-charts/ui/variant_codegen_svelte-values.yaml b/helm-charts/ui/variant_codegen_svelte-values.yaml new file mode 100644 index 000000000..8ec0519eb --- /dev/null +++ b/helm-charts/ui/variant_codegen_svelte-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/codegen-ui + tag: "latest" + +containerPort: 5173 + +BACKEND_SERVICE_ENDPOINT: "/v1/codegen" diff --git a/helm-charts/ui/variant_codetrans_svelte-values.yaml b/helm-charts/ui/variant_codetrans_svelte-values.yaml new file mode 100644 index 000000000..fe943ae08 --- /dev/null +++ b/helm-charts/ui/variant_codetrans_svelte-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/codetrans-ui + tag: "latest" + +containerPort: 5173 + +BACKEND_SERVICE_ENDPOINT: "/v1/codetrans" diff --git a/helm-charts/ui/variant_docsum_react-values.yaml b/helm-charts/ui/variant_docsum_react-values.yaml new file mode 100644 index 000000000..a28b9acb7 --- /dev/null +++ b/helm-charts/ui/variant_docsum_react-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/docsum-react-ui + tag: "latest" + +BACKEND_SERVICE_ENDPOINT: "/v1/docsum" diff --git a/helm-charts/ui/variant_docsum_svelte-values.yaml b/helm-charts/ui/variant_docsum_svelte-values.yaml new file mode 100644 index 000000000..6c9e215f4 --- /dev/null +++ b/helm-charts/ui/variant_docsum_svelte-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/docsum-ui + tag: "latest" + +containerPort: 5173 + +BACKEND_SERVICE_ENDPOINT: "/v1/docsum" diff --git a/helm-charts/ui/variant_faqgen_react-values.yaml b/helm-charts/ui/variant_faqgen_react-values.yaml new file mode 100644 index 000000000..4af8aa97e --- /dev/null +++ b/helm-charts/ui/variant_faqgen_react-values.yaml @@ -0,0 +1,8 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/faqgen-react-ui + tag: "latest" + +BACKEND_SERVICE_ENDPOINT: "/v1/faqgen" diff --git a/helm-charts/ui/variant_faqgen_svelte-values.yaml b/helm-charts/ui/variant_faqgen_svelte-values.yaml new file mode 100644 index 000000000..846cee1ad --- /dev/null +++ b/helm-charts/ui/variant_faqgen_svelte-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/faqgen-ui + tag: "latest" + +containerPort: 5173 + +BACKEND_SERVICE_ENDPOINT: "/v1/faqgen" diff --git a/helm-charts/ui/variant_visualqna_svelte-values.yaml b/helm-charts/ui/variant_visualqna_svelte-values.yaml new file mode 100644 index 000000000..2b1aab1d2 --- /dev/null +++ b/helm-charts/ui/variant_visualqna_svelte-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/visualqna-ui + tag: "latest" + +containerPort: 5173 + +BACKEND_SERVICE_ENDPOINT: "/v1/visualqna" diff --git a/helm-charts/vllm/.helmignore b/helm-charts/vllm/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/vllm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/vllm/Chart.yaml b/helm-charts/vllm/Chart.yaml new file mode 100644 index 000000000..47dacc70b --- /dev/null +++ b/helm-charts/vllm/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: vllm +description: The Helm chart for vLLM Inference Server +type: application +version: 0-latest +# The vLLM version +appVersion: "0.5" diff --git a/helm-charts/vllm/README.md b/helm-charts/vllm/README.md new file mode 100644 index 000000000..0235a7443 --- /dev/null +++ b/helm-charts/vllm/README.md @@ -0,0 +1,53 @@ +# vllm + +Helm chart for deploying vLLM Inference service. + +Refer to [Deploy with Helm Charts](../../README.md) for global guides. + +## Installing the Chart + +To install the chart, run the following: + +Note that you cannot use vllm as the service release name due to [environment variables conflict](https://docs.vllm.ai/en/stable/serving/env_vars.html#environment-variables). + +```console +cd GenAIInfra/helm-charts/common +export MODELDIR=/mnt/opea-models +export MODELNAME="Intel/neural-chat-7b-v3-3" +export HFTOKEN="insert-your-huggingface-token-here" +helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +# To deploy on Gaudi enabled kubernetes cluster +# helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +``` + +By default, the vllm service will downloading the "Intel/neural-chat-7b-v3-3". + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="facebook/opt-125m" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng. + +Then run the command `kubectl port-forward svc/myvllm 2080:80` to expose the vllm service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:2080/v1/completions \ + -H "Content-Type: application/json" \ + -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, vllm will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"opea/vllm"` | | +| image.tag | string | `"latest"` | | diff --git a/helm-charts/vllm/ci-gaudi-values.yaml b/helm-charts/vllm/ci-gaudi-values.yaml new file mode 120000 index 000000000..7243d31b2 --- /dev/null +++ b/helm-charts/vllm/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/vllm/ci-values.yaml b/helm-charts/vllm/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/vllm/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/vllm/gaudi-values.yaml b/helm-charts/vllm/gaudi-values.yaml new file mode 100644 index 000000000..65e622044 --- /dev/null +++ b/helm-charts/vllm/gaudi-values.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for vllm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + repository: opea/vllm-gaudi + tag: "latest" + +# VLLM_CPU_KVCACHE_SPACE: "40" +OMPI_MCA_btl_vader_single_copy_mechanism: none +extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"] +# Workaround for current HPU image with start command /bin/bash +# extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"] +resources: + limits: + habana.ai/gaudi: 1 diff --git a/helm-charts/vllm/templates/_helpers.tpl b/helm-charts/vllm/templates/_helpers.tpl new file mode 100644 index 000000000..41b1f8d66 --- /dev/null +++ b/helm-charts/vllm/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "vllm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "vllm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "vllm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "vllm.labels" -}} +helm.sh/chart: {{ include "vllm.chart" . }} +{{ include "vllm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "vllm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "vllm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "vllm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "vllm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/vllm/templates/configmap.yaml b/helm-charts/vllm/templates/configmap.yaml new file mode 100644 index 000000000..14a8ba240 --- /dev/null +++ b/helm-charts/vllm/templates/configmap.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "vllm.fullname" . }}-config + labels: + {{- include "vllm.labels" . | nindent 4 }} +data: + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- if contains "opea/vllm-gaudi" .Values.image.repository }} + HABANA_LOGS: "/tmp/habana_logs" + {{- end }} + NUMBA_CACHE_DIR: "/tmp" + HF_HOME: "/tmp/.cache/huggingface" + # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29 + OUTLINES_CACHE_DIR: "/tmp/.cache/outlines" + {{- if .Values.VLLM_CPU_KVCACHE_SPACE }} + VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}} + {{- end }} + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}} + {{- end }} diff --git a/helm-charts/vllm/templates/deployment.yaml b/helm-charts/vllm/templates/deployment.yaml new file mode 100644 index 000000000..66b5e3722 --- /dev/null +++ b/helm-charts/vllm/templates/deployment.yaml @@ -0,0 +1,127 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "vllm.fullname" . }} + labels: + {{- include "vllm.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "vllm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "vllm.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "vllm.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + args: + {{- if .Values.extraCmdArgs }} + {{- range .Values.extraCmdArgs }} + - {{ . | quote }} + {{- end }} + {{- end }} + - "--model" + - {{ .Values.LLM_MODEL_ID | quote }} + - "--host" + - "0.0.0.0" + - "--port" + - {{ .Values.port | quote }} + - "--download-dir" + - "/data" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + - mountPath: /tmp + name: tmp + ports: + - name: http + containerPort: {{ .Values.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: shm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.shmSize }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "vllm.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/vllm/templates/service.yaml b/helm-charts/vllm/templates/service.yaml new file mode 100644 index 000000000..42e4fab70 --- /dev/null +++ b/helm-charts/vllm/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "vllm.fullname" . }} + labels: + {{- include "vllm.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 80 + targetPort: {{ .Values.port }} + protocol: TCP + name: vllm + selector: + {{- include "vllm.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/vllm/templates/tests/test-pod.yaml b/helm-charts/vllm/templates/tests/test-pod.yaml new file mode 100644 index 000000000..6a712e3ed --- /dev/null +++ b/helm-charts/vllm/templates/tests/test-pod.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "vllm.fullname" . }}-testpod" + labels: + {{- include "vllm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "vllm.fullname" . }}/v1/completions -sS --fail-with-body \ + -H "Content-Type: application/json" \ + -d '{"model": {{ .Values.LLM_MODEL_ID | quote }},"prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/vllm/values.yaml b/helm-charts/vllm/values.yaml new file mode 100644 index 000000000..fb599ccd3 --- /dev/null +++ b/helm-charts/vllm/values.yaml @@ -0,0 +1,100 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for vllm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +port: 2080 +shmSize: 1Gi +image: + repository: opea/vllm + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} +# readOnlyRootFilesystem: true +# allowPrivilegeEscalation: false +# runAsNonRoot: true +# runAsUser: 1000 +# capabilities: +# drop: +# - ALL +# seccompProfile: +# type: RuntimeDefault + +service: + type: ClusterIP + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +extraCmdArgs: ["--enforce-eager", "--dtype", "auto"] + +livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 +VLLM_CPU_KVCACHE_SPACE: "" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: "" diff --git a/helm-charts/web-retriever/.helmignore b/helm-charts/web-retriever/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/web-retriever/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/web-retriever/Chart.yaml b/helm-charts/web-retriever/Chart.yaml new file mode 100644 index 000000000..5153f480b --- /dev/null +++ b/helm-charts/web-retriever/Chart.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: web-retriever +description: The Helm chart for deploying web retriever as microservice +type: application +version: 0-latest +# The web retriever microservice server version +appVersion: "v1.0" +dependencies: + - name: tei + version: 0-latest + repository: file://../tei + condition: tei.enabled diff --git a/helm-charts/web-retriever/README.md b/helm-charts/web-retriever/README.md new file mode 100644 index 000000000..c3aee3a5b --- /dev/null +++ b/helm-charts/web-retriever/README.md @@ -0,0 +1,58 @@ +# web-retriever + +Helm chart for deploying Web Retriever microservice. + +Web retriever depends on tei, you should set TEI_EMBEDDING_ENDPOINT endpoints before start. + +## (Option1): Installing the chart separately + +First, you need to install the tei chart, please refer to the [tei](../tei) chart for more information. + +After you've deployted the tei chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e `http://tei`. + +To install the web-retriever chart, run the following: + +```console +cd GenAIInfra/helm-charts/common/web-retriever +helm dependency update +export TEI_EMBEDDING_ENDPOINT="http://tei" +export GOOGLE_API_KEY="yourownkey" +export GOOGLE_CSE_ID="yourownid" +helm install web-retriever . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set GOOGLE_API_KEY=${GOOGLE_API_KEY} --set GOOGLE_CSE_ID=${GOOGLE_CSE_ID} +``` + +## (Option2): Installing the chart with dependencies automatically + +```console +cd GenAIInfra/helm-charts/common/web-retriever +helm dependency update +export GOOGLE_API_KEY="yourownkey" +export GOOGLE_CSE_ID="yourownid" +helm install web-retriever . --set tei.enabled=true --set GOOGLE_API_KEY=${GOOGLE_API_KEY} --set GOOGLE_CSE_ID=${GOOGLE_CSE_ID} +``` + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Then run the command `kubectl port-forward svc/web-retriever 7077:7077` to expose the web-retriever service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") +curl http://localhost:7077/v1/web_retrieval \ + -X POST \ + -d "{\"text\":\"What is OPEA?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------------- | ------ | ----------------------------- | ----------- | +| image.repository | string | `"opea/web-retriever-chroma"` | | +| service.port | string | `"7077"` | | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | +| GOOGLE_API_KEY | string | `""` | | +| GOOGLE_CSE_ID | string | `""` | | diff --git a/helm-charts/web-retriever/ci-values.yaml b/helm-charts/web-retriever/ci-values.yaml new file mode 100644 index 000000000..f0940ef43 --- /dev/null +++ b/helm-charts/web-retriever/ci-values.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for web-retriever. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: true diff --git a/helm-charts/web-retriever/templates/_helpers.tpl b/helm-charts/web-retriever/templates/_helpers.tpl new file mode 100644 index 000000000..1bdb3e1b2 --- /dev/null +++ b/helm-charts/web-retriever/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "web-retriever.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "web-retriever.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "web-retriever.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "web-retriever.labels" -}} +helm.sh/chart: {{ include "web-retriever.chart" . }} +{{ include "web-retriever.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "web-retriever.selectorLabels" -}} +app.kubernetes.io/name: {{ include "web-retriever.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "web-retriever.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "web-retriever.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/web-retriever/templates/configmap.yaml b/helm-charts/web-retriever/templates/configmap.yaml new file mode 100644 index 000000000..a3ac0f9ac --- /dev/null +++ b/helm-charts/web-retriever/templates/configmap.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "web-retriever.fullname" . }}-config + labels: + {{- include "web-retriever.labels" . | nindent 4 }} +data: + {{- if .Values.TEI_EMBEDDING_ENDPOINT }} + TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}} + {{- else }} + TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei" + {{- end }} + GOOGLE_API_KEY: {{ .Values.GOOGLE_API_KEY | quote }} + GOOGLE_CSE_ID: {{ .Values.GOOGLE_CSE_ID | quote }} + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + {{- if and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} + no_proxy: "{{ .Release.Name }}-tei,{{ .Values.global.no_proxy }}" + {{- else }} + no_proxy: {{ .Values.global.no_proxy | quote }} + {{- end }} + HF_HOME: "/tmp/.cache/huggingface" + LOGFLAG: {{ .Values.LOGFLAG | quote }} diff --git a/helm-charts/web-retriever/templates/deployment.yaml b/helm-charts/web-retriever/templates/deployment.yaml new file mode 100644 index 000000000..307206d3b --- /dev/null +++ b/helm-charts/web-retriever/templates/deployment.yaml @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "web-retriever.fullname" . }} + labels: + {{- include "web-retriever.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "web-retriever.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "web-retriever.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "web-retriever.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: web-retriever + containerPort: 7077 + protocol: TCP + volumeMounts: + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "web-retriever.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/web-retriever/templates/service.yaml b/helm-charts/web-retriever/templates/service.yaml new file mode 100644 index 000000000..93b0b8da7 --- /dev/null +++ b/helm-charts/web-retriever/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "web-retriever.fullname" . }} + labels: + {{- include "web-retriever.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 7077 + protocol: TCP + name: web-retriever + selector: + {{- include "web-retriever.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/web-retriever/templates/tests/test-pod.yaml b/helm-charts/web-retriever/templates/tests/test-pod.yaml new file mode 100644 index 000000000..a1c0a3aaa --- /dev/null +++ b/helm-charts/web-retriever/templates/tests/test-pod.yaml @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "web-retriever.fullname" . }}-testpod" + labels: + {{- include "web-retriever.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)"); + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "web-retriever.fullname" . }}:{{ .Values.service.port }}/v1/web_retrieval \ + -X POST \ + -d "{\"text\":\"What is OPEA?\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/web-retriever/values.yaml b/helm-charts/web-retriever/values.yaml new file mode 100644 index 000000000..e3d254476 --- /dev/null +++ b/helm-charts/web-retriever/values.yaml @@ -0,0 +1,95 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for web-retriever. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: false + +replicaCount: 1 + +# Set it as a non-null string, such as true, if you want to enable logging facility, +# otherwise, keep it as "" to disable it. +LOGFLAG: "" + +TEI_EMBEDDING_ENDPOINT: "" +GOOGLE_API_KEY: "" +GOOGLE_CSE_ID: "" + +image: + repository: opea/web-retriever-chroma + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for retriever service is 7000 + port: 7077 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: v1/health_check + port: web-retriever + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: v1/health_check + port: web-retriever + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: web-retriever + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" diff --git a/helm-charts/whisper/.helmignore b/helm-charts/whisper/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm-charts/whisper/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm-charts/whisper/Chart.yaml b/helm-charts/whisper/Chart.yaml new file mode 100644 index 000000000..c03ad7a0c --- /dev/null +++ b/helm-charts/whisper/Chart.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: whisper +description: The Helm chart for deploying whisper as microservice +type: application +version: 0-latest +# The whisper microservice server version +appVersion: "v1.0" diff --git a/helm-charts/whisper/README.md b/helm-charts/whisper/README.md new file mode 100644 index 000000000..6c1215515 --- /dev/null +++ b/helm-charts/whisper/README.md @@ -0,0 +1,32 @@ +# whisper + +Helm chart for deploying whisper service. + +## Installing the Chart + +To install the chart, run the following: + +```console +export MODELDIR=/mnt/opea-models +export ASR_MODEL_PATH="openai/whisper-small" +helm install whisper whisper --set global.modelUseHostPath=${MODELDIR} --set ASR_MODEL_PATH=${ASR_MODEL_PATH} +``` + +## Verify + +Use port-forward to access it from localhost. + +```console +kubectl port-forward service/whisper 1234:7066 & +curl http://localhost:1234/v1/asr \ + -XPOST \ + -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ---------------- | ------ | ---------------- | ----------- | +| image.repository | string | `"opea/whisper"` | | +| service.port | string | `"7066"` | | diff --git a/helm-charts/whisper/ci-values.yaml b/helm-charts/whisper/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/whisper/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/whisper/gaudi-values.yaml b/helm-charts/whisper/gaudi-values.yaml new file mode 100644 index 000000000..fec919ad3 --- /dev/null +++ b/helm-charts/whisper/gaudi-values.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for whisper. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +image: + repository: opea/whisper-gaudi + tag: "latest" + +resources: + limits: + habana.ai/gaudi: 1 diff --git a/helm-charts/whisper/templates/_helpers.tpl b/helm-charts/whisper/templates/_helpers.tpl new file mode 100644 index 000000000..264e205a4 --- /dev/null +++ b/helm-charts/whisper/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "whisper.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "whisper.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "whisper.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "whisper.labels" -}} +helm.sh/chart: {{ include "whisper.chart" . }} +{{ include "whisper.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "whisper.selectorLabels" -}} +app.kubernetes.io/name: {{ include "whisper.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "whisper.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "whisper.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/whisper/templates/configmap.yaml b/helm-charts/whisper/templates/configmap.yaml new file mode 100644 index 000000000..2e27dea5d --- /dev/null +++ b/helm-charts/whisper/templates/configmap.yaml @@ -0,0 +1,20 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "whisper.fullname" . }}-config + labels: + {{- include "whisper.labels" . | nindent 4 }} +data: + EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" + ASR_MODEL_PATH: {{ .Values.ASR_MODEL_PATH | quote }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + HUGGINGFACE_HUB_CACHE: "/data" diff --git a/helm-charts/whisper/templates/deployment.yaml b/helm-charts/whisper/templates/deployment.yaml new file mode 100644 index 000000000..c5d79899c --- /dev/null +++ b/helm-charts/whisper/templates/deployment.yaml @@ -0,0 +1,107 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "whisper.fullname" . }} + labels: + {{- include "whisper.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "whisper.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "whisper.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + envFrom: + - configMapRef: + name: {{ include "whisper.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- if .Values.global.modelUseHostPath }} + {} + {{- else }} + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + ports: + - name: whisper + containerPort: 7066 + protocol: TCP + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "whisper.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/whisper/templates/service.yaml b/helm-charts/whisper/templates/service.yaml new file mode 100644 index 000000000..42852290f --- /dev/null +++ b/helm-charts/whisper/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "whisper.fullname" . }} + labels: + {{- include "whisper.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: 7066 + protocol: TCP + name: whisper + selector: + {{- include "whisper.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/whisper/templates/tests/test-pod.yaml b/helm-charts/whisper/templates/tests/test-pod.yaml new file mode 100644 index 000000000..9d6b1de51 --- /dev/null +++ b/helm-charts/whisper/templates/tests/test-pod.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "whisper.fullname" . }}-testpod" + labels: + {{- include "whisper.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl -sS --fail-with-body http://{{ include "whisper.fullname" . }}:{{ .Values.service.port }}/v1/asr \ + -X POST \ + -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/whisper/values.yaml b/helm-charts/whisper/values.yaml new file mode 100644 index 000000000..c368fe669 --- /dev/null +++ b/helm-charts/whisper/values.yaml @@ -0,0 +1,96 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for whisper. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +ASR_MODEL_PATH: "openai/whisper-small" + +image: + repository: opea/whisper + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +service: + type: ClusterIP + # The default port for whisper service is 7066 + port: 7066 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +livenessProbe: + httpGet: + path: /v1/health + port: whisper + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 +readinessProbe: + httpGet: + path: /v1/health + port: whisper + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: /v1/health + port: whisper + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test. Example: + # modelUseHostPath: /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example: + # modelUsePVC: model-volume + # You can only set one of the following var, the behavior is not defined is both are set. + # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume. + modelUseHostPath: "" + modelUsePVC: ""