diff --git a/helm-charts/agent/.helmignore b/helm-charts/agent/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/agent/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/agent/Chart.yaml b/helm-charts/agent/Chart.yaml
new file mode 100644
index 000000000..5b261b98a
--- /dev/null
+++ b/helm-charts/agent/Chart.yaml
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: agent
+description: The Helm chart for deploying agent microservice
+type: application
+version: 0-latest
+# The llm microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 0-latest
+    repository: file://../tgi
+    condition: tgi.enabled
+  - name: vllm
+    version: 0-latest
+    repository: file://../vllm
+    condition: vllm.enabled
diff --git a/helm-charts/agent/README.md b/helm-charts/agent/README.md
new file mode 100644
index 000000000..820de8951
--- /dev/null
+++ b/helm-charts/agent/README.md
@@ -0,0 +1,46 @@
+# agent
+
+Helm chart for deploying Agent microservice.
+
+agent depends on LLM service, you should set llm_endpoint_url as LLM endpoint.
+
+## Deploy
+
+### Use external LLM endpoint
+
+helm install agent oci://ghcr.io/opea-project/charts/agent --set llm_endpoint_url=${YOUR_LLM_ENDPOINT}
+
+### Deploy with tgi
+
+helm install agent oci://ghcr.io/opea-project/charts/agent --set tgi.enabled=True
+
+### Deploy with vllm
+
+helm install agent oci://ghcr.io/opea-project/charts/agent --set vllm.enabled=True
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/agent 9090:9090` to expose the agent service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9090/v1/chat/completions \
+    -X POST \
+    -H 'Content-Type: application/json' \
+    -d '{"query":"What is OPEA?"}'
+```
+
+## Options
+
+For global options, see Global Options.
+
+| Key                             | Type   | Default                  | Description                     |
+| ------------------------------- | ------ | ------------------------ | ------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                     | Your own Hugging Face API token |
+| image.repository                | string | `"opea/agent-langchain"` |                                 |
+| service.port                    | string | `"9090"`                 |                                 |
+| llm_endpoint_url                | string | `""`                     | LLM endpoint                    |
+| global.monitoring               | bop;   | false                    | Service usage metrics           |
diff --git a/helm-charts/agent/ci-gaudi-values.yaml b/helm-charts/agent/ci-gaudi-values.yaml
new file mode 120000
index 000000000..7243d31b2
--- /dev/null
+++ b/helm-charts/agent/ci-gaudi-values.yaml
@@ -0,0 +1 @@
+gaudi-values.yaml
\ No newline at end of file
diff --git a/helm-charts/agent/gaudi-values.yaml b/helm-charts/agent/gaudi-values.yaml
new file mode 100644
index 000000000..91ef5d102
--- /dev/null
+++ b/helm-charts/agent/gaudi-values.yaml
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Accelerate inferencing in heaviest components to improve performance
+# by overriding their subchart values
+
+tgi:
+  enabled: true
+  accelDevice: "gaudi"
+  image:
+    repository: ghcr.io/huggingface/tgi-gaudi
+    tag: "2.0.6"
+  resources:
+    limits:
+      habana.ai/gaudi: 4
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+  CUDA_GRAPHS: ""
+  OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: "true"
+  ENABLE_HPU_GRAPH: "true"
+  LIMIT_HPU_GRAPH: "true"
+  USE_FLASH_ATTENTION: "true"
+  FLASH_ATTENTION_RECOMPUTE: "true"
+  extraCmdArgs: ["--sharded","true","--num-shard","4"]
+  livenessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  readinessProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+  startupProbe:
+    initialDelaySeconds: 5
+    periodSeconds: 5
+    timeoutSeconds: 1
+    failureThreshold: 120
diff --git a/helm-charts/agent/templates/_helpers.tpl b/helm-charts/agent/templates/_helpers.tpl
new file mode 100644
index 000000000..1227dfd81
--- /dev/null
+++ b/helm-charts/agent/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "agent.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "agent.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "agent.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "agent.labels" -}}
+helm.sh/chart: {{ include "agent.chart" . }}
+{{ include "agent.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "agent.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "agent.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "agent.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "agent.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/agent/templates/configmap.yaml b/helm-charts/agent/templates/configmap.yaml
new file mode 100644
index 000000000..62970d7d2
--- /dev/null
+++ b/helm-charts/agent/templates/configmap.yaml
@@ -0,0 +1,66 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "agent.fullname" . }}-config
+  labels:
+    {{- include "agent.labels" . | nindent 4 }}
+data:
+  {{- if .Values.tools }}
+  tools: {{ .Values.tools | quote }}
+  {{- end }}
+  {{- if .Values.llm_endpoint_url }}
+  llm_endpoint_url: {{ .Values.llm_endpoint_url | quote }}
+  {{- else }}
+  llm_endpoint_url: "http://{{ .Release.Name }}-tgi"
+  {{- end }}
+  # {{- if .Values.port }}
+  # port: {{ .Values.port | quote }}
+  # {{- end }}
+  {{- if .Values.model }}
+  model: {{ .Values.model | quote }}
+  {{- end }}
+  {{- if .Values.streaming }}
+  streaming: {{ .Values.streaming | quote }}
+  {{- end }}
+  {{- if .Values.temperature }}
+  temperature: {{ .Values.temperature | quote }}
+  {{- end }}
+  {{- if .Values.RETRIEVAL_TOOL_URL }}
+  RETRIEVAL_TOOL_URL: {{ .Values.RETRIEVAL_TOOL_URL | quote }}
+  {{- else }}
+  RETRIEVAL_TOOL_URL: "http://{{ .Release.Name }}-docretriever:8889/v1/retrievaltool"
+  {{- end }}
+  {{- if .Values.CRAG_SERVER }}
+  CRAG_SERVER: {{ .Values.CRAG_SERVER | quote }}
+  {{- else }}
+  CRAG_SERVER: "http://{{ .Release.Name }}-crag:8080"
+  {{- end }}
+  {{- if .Values.WORKER_AGENT_URL }}
+  WORKER_AGENT_URL: {{ .Values.WORKER_AGENT_URL | quote }}
+  {{- else }}
+  WORKER_AGENT_URL: "http://{{ .Release.Name }}-worker:9095/v1/chat/completions"
+  {{- end }}
+  require_human_feedback: {{ .Values.require_human_feedback | quote }}
+  recursion_limit: {{ .Values.recursion_limit | quote }}
+  llm_engine: {{ .Values.llm_engine | quote }}
+  strategy: {{ .Values.strategy | quote }}
+  max_new_tokens: {{ .Values.max_new_tokens | quote }}
+  {{- if .Values.OPENAI_API_KEY }}
+  OPENAI_API_KEY: {{ .Values.OPENAI_API_KEY | quote }}
+  {{- end }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote }}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.TGI_LLM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/agent/templates/deployment.yaml b/helm-charts/agent/templates/deployment.yaml
new file mode 100644
index 000000000..12c0e00a2
--- /dev/null
+++ b/helm-charts/agent/templates/deployment.yaml
@@ -0,0 +1,100 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "agent.fullname" . }}
+  labels:
+    {{- include "agent.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "agent.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "agent.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "agent.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: agent
+              containerPort: 9090
+              protocol: TCP
+          volumeMounts:
+            {{- if .Values.toolPath }}
+            - mountPath: /home/user/tools
+              name: tool
+            {{- end }}
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        {{- if .Values.toolPath }}
+        - name: tool
+          hostPath:
+            path: {{ .Values.toolPath }}
+            type: Directory
+        {{- end }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "agent.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/agent/templates/service.yaml b/helm-charts/agent/templates/service.yaml
new file mode 100644
index 000000000..00d34de8d
--- /dev/null
+++ b/helm-charts/agent/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "agent.fullname" . }}
+  labels:
+    {{- include "agent.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9090
+      protocol: TCP
+      name: agent
+  selector:
+    {{- include "agent.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/agent/templates/servicemonitor.yaml b/helm-charts/agent/templates/servicemonitor.yaml
new file mode 100644
index 000000000..0eaae8763
--- /dev/null
+++ b/helm-charts/agent/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "agent.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "agent.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: agent
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/agent/templates/tests/test-pod.yaml b/helm-charts/agent/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..61515c7a3
--- /dev/null
+++ b/helm-charts/agent/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "agent.fullname" . }}-testpod"
+  labels:
+    {{- include "agent.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+          curl http://{{ include "agent.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
+            -X POST \
+            -H 'Content-Type: application/json' \
+            -d '{"query":"What is OPEA?"}' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/agent/values.yaml b/helm-charts/agent/values.yaml
new file mode 100644
index 000000000..9d7b236d1
--- /dev/null
+++ b/helm-charts/agent/values.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for agent.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi:
+  enabled: false
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+
+vllm:
+  enabled: false
+  LLM_MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3"
+  extraCmdArgs: ["/bin/bash", "-c", "python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model mistralai/Mistral-7B-Instruct-v0.3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 4096 --max-seq_len-to-capture 8192 --enable-auto-tool-choice --tool-call-parser mistral"]
+
+replicaCount: 1
+llm_endpoint_url: ""
+model: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+max_new_tokens: "4096"
+llm_engine: "tgi"
+strategy: "react_langchain"
+recursion_limit: "15"
+require_human_feedback: "false"
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: "True"
+
+image:
+  repository: opea/agent-langchain
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for agent service is 9090
+  port: 9090
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: agent
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: agent
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: agent
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/asr/.helmignore b/helm-charts/asr/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/asr/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/asr/Chart.yaml b/helm-charts/asr/Chart.yaml
new file mode 100644
index 000000000..90253b164
--- /dev/null
+++ b/helm-charts/asr/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: asr
+description: The Helm chart for deploying asr as microservice
+type: application
+version: 0-latest
+# The asr microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: whisper
+    version: 0-latest
+    repository: file://../whisper
+    condition: whisper.enabled
diff --git a/helm-charts/asr/README.md b/helm-charts/asr/README.md
new file mode 100644
index 000000000..a0c131936
--- /dev/null
+++ b/helm-charts/asr/README.md
@@ -0,0 +1,51 @@
+# asr
+
+Helm chart for deploying asr microservice.
+
+asr depends on whisper, you should set ASR_ENDPOINT endpoints before start.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the whisper chart, please refer to the [whisper](../whisper/README.md) chart for more information.
+
+After you've deployted the whisper chart successfully, please run `kubectl get svc` to get the whisper service endpoint, i.e `http://whisper:7066`.
+
+To install the asr chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/asr
+export ASR_ENDPOINT="http://whisper:7066"
+helm dependency update
+helm install asr . --set ASR_ENDPOINT=${ASR_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/asr
+helm dependency update
+helm install asr . --set whisper.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/asr 9099:9099` to expose the asr service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9099/v1/audio/transcriptions \
+  -XPOST \
+  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key              | Type   | Default      | Description |
+| ---------------- | ------ | ------------ | ----------- |
+| image.repository | string | `"opea/asr"` |             |
+| service.port     | string | `"9099"`     |             |
+| ASR_ENDPOINT     | string | `""`         |             |
diff --git a/helm-charts/asr/ci-values.yaml b/helm-charts/asr/ci-values.yaml
new file mode 100644
index 000000000..b0f302d8b
--- /dev/null
+++ b/helm-charts/asr/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for asr.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+whisper:
+  enabled: true
diff --git a/helm-charts/asr/templates/_helpers.tpl b/helm-charts/asr/templates/_helpers.tpl
new file mode 100644
index 000000000..62d5a0554
--- /dev/null
+++ b/helm-charts/asr/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "asr.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "asr.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "asr.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "asr.labels" -}}
+helm.sh/chart: {{ include "asr.chart" . }}
+{{ include "asr.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "asr.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "asr.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "asr.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "asr.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/asr/templates/configmap.yaml b/helm-charts/asr/templates/configmap.yaml
new file mode 100644
index 000000000..965e98fc2
--- /dev/null
+++ b/helm-charts/asr/templates/configmap.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "asr.fullname" . }}-config
+  labels:
+    {{- include "asr.labels" . | nindent 4 }}
+data:
+  {{- if .Values.ASR_ENDPOINT }}
+  ASR_ENDPOINT: {{ .Values.ASR_ENDPOINT | quote}}
+  {{- else }}
+  ASR_ENDPOINT: "http://{{ .Release.Name }}-whisper:7066"
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.ASR_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-whisper,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/asr/templates/deployment.yaml b/helm-charts/asr/templates/deployment.yaml
new file mode 100644
index 000000000..309ff56ca
--- /dev/null
+++ b/helm-charts/asr/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "asr.fullname" . }}
+  labels:
+    {{- include "asr.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "asr.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "asr.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "asr.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: asr
+              containerPort: 9099
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "asr.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/asr/templates/service.yaml b/helm-charts/asr/templates/service.yaml
new file mode 100644
index 000000000..dfa3e5b41
--- /dev/null
+++ b/helm-charts/asr/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "asr.fullname" . }}
+  labels:
+    {{- include "asr.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9099
+      protocol: TCP
+      name: asr
+  selector:
+    {{- include "asr.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/asr/templates/tests/test-pod.yaml b/helm-charts/asr/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..d71fbf9f7
--- /dev/null
+++ b/helm-charts/asr/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "asr.fullname" . }}-testpod"
+  labels:
+    {{- include "asr.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "asr.fullname" . }}:{{ .Values.service.port }}/v1/audio/transcriptions \
+            -XPOST \
+            -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/asr/values.yaml b/helm-charts/asr/values.yaml
new file mode 100644
index 000000000..92728e644
--- /dev/null
+++ b/helm-charts/asr/values.yaml
@@ -0,0 +1,93 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for asr.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+whisper:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+ASR_ENDPOINT: ""
+
+image:
+  repository: opea/asr
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for asr service is 9099
+  port: 9099
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: asr
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: asr
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: asr
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/chathistory-usvc/.helmignore b/helm-charts/chathistory-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/chathistory-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/chathistory-usvc/Chart.yaml b/helm-charts/chathistory-usvc/Chart.yaml
new file mode 100644
index 000000000..f7a4cf8ba
--- /dev/null
+++ b/helm-charts/chathistory-usvc/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chathistory-usvc
+description: The Helm chart for deploying chat history as microservice
+type: application
+version: 0-latest
+# The chat history microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: mongodb
+    version: 0-latest
+    repository: file://../mongodb
+    condition: mongodb.enabled
diff --git a/helm-charts/chathistory-usvc/README.md b/helm-charts/chathistory-usvc/README.md
new file mode 100644
index 000000000..171e3b93c
--- /dev/null
+++ b/helm-charts/chathistory-usvc/README.md
@@ -0,0 +1,54 @@
+# chathistory-usvc
+
+Helm chart for deploying chathistory-usvc microservice.
+
+chathistory-usvc will use redis and tei service, please specify the endpoints.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the mongodb chart, please refer to the [mongodb](../mongodb) for more information.
+
+After you've deployted the mongodb chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `mongodb:27017`.
+
+To install chathistory-usvc chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/chathistory-usvc
+export MONGO_HOST="mongodb"
+export MONGO_PORT="27017"
+helm dependency update
+helm install chathistory-usvc . --set MONGO_HOST=${MONGO_HOST} --set MONGO_PORT=${MONGO_PORT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/chathistory-usvc
+helm dependency update
+helm install chathistory-usvc . --set mongodb.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/chathistory-usvc 6012:6012` to expose the chathistory-usvc service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl -X 'POST' \
+  http://localhost:6012/v1/chathistory/create \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{"data": {"messages": "test Messages", "user": "test"}}'
+```
+
+## Values
+
+| Key              | Type   | Default                           | Description |
+| ---------------- | ------ | --------------------------------- | ----------- |
+| image.repository | string | `"opea/chathistory-mongo-server"` |             |
+| service.port     | string | `"6012"`                          |             |
+| MONGO_HOST       | string | `""`                              |             |
+| MONGO_PORT       | string | `""`                              |             |
diff --git a/helm-charts/chathistory-usvc/ci-values.yaml b/helm-charts/chathistory-usvc/ci-values.yaml
new file mode 100644
index 000000000..b2ce309f3
--- /dev/null
+++ b/helm-charts/chathistory-usvc/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for chathistory-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+mongodb:
+  enabled: true
diff --git a/helm-charts/chathistory-usvc/templates/_helpers.tpl b/helm-charts/chathistory-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..6442c29ee
--- /dev/null
+++ b/helm-charts/chathistory-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "chathistory-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "chathistory-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "chathistory-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "chathistory-usvc.labels" -}}
+helm.sh/chart: {{ include "chathistory-usvc.chart" . }}
+{{ include "chathistory-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "chathistory-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "chathistory-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "chathistory-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "chathistory-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/chathistory-usvc/templates/configmap.yaml b/helm-charts/chathistory-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..14149d592
--- /dev/null
+++ b/helm-charts/chathistory-usvc/templates/configmap.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "chathistory-usvc.fullname" . }}-config
+  labels:
+    {{- include "chathistory-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.MONGO_HOST }}
+  MONGO_HOST: {{ .Values.MONGO_HOST | quote}}
+  {{- else }}
+  MONGO_HOST: "{{ .Release.Name }}-mongodb"
+  {{- end }}
+  {{- if .Values.MONGO_PORT }}
+  MONGO_PORT: {{ .Values.MONGO_PORT | quote }}
+  {{- else }}
+  MONGO_PORT: "27017"
+  {{- end }}
+  DB_NAME: {{ .Values.DB_NAME | quote }}
+  COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.MONGO_HOST) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-mongodb,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/chathistory-usvc/templates/deployment.yaml b/helm-charts/chathistory-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..b721318e6
--- /dev/null
+++ b/helm-charts/chathistory-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "chathistory-usvc.fullname" . }}
+  labels:
+    {{- include "chathistory-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "chathistory-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "chathistory-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "chathistory-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: port
+              containerPort: 6012
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "chathistory-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/chathistory-usvc/templates/service.yaml b/helm-charts/chathistory-usvc/templates/service.yaml
new file mode 100644
index 000000000..d6cae5a99
--- /dev/null
+++ b/helm-charts/chathistory-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "chathistory-usvc.fullname" . }}
+  labels:
+    {{- include "chathistory-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 6012
+      protocol: TCP
+      name: port
+  selector:
+    {{- include "chathistory-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml b/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..c0193e01a
--- /dev/null
+++ b/helm-charts/chathistory-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ include "chathistory-usvc.fullname" . }}-testpod
+  labels:
+    {{- include "chathistory-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          set -x
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -X 'POST' \
+              http://{{ include "chathistory-usvc.fullname" . }}:{{ .Values.service.port }}/v1/chathistory/create -sS --fail-with-body \
+              -H 'accept: application/json' \
+              -H 'Content-Type: application/json' \
+              -d '{"data": {"messages": "test Messages", "user": "test"}}' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/chathistory-usvc/values.yaml b/helm-charts/chathistory-usvc/values.yaml
new file mode 100644
index 000000000..d5f2faa92
--- /dev/null
+++ b/helm-charts/chathistory-usvc/values.yaml
@@ -0,0 +1,96 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for chathistory-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+mongodb:
+  enabled: false
+
+replicaCount: 1
+
+image:
+  repository: opea/chathistory-mongo-server
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 6012
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+# MongoDB info
+MONGO_HOST: ""
+MONGO_PORT: ""
+DB_NAME: "OPEA"
+COLLECTION_NAME: "ChatHistory"
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/data-prep/.helmignore b/helm-charts/data-prep/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/data-prep/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/data-prep/Chart.yaml b/helm-charts/data-prep/Chart.yaml
new file mode 100644
index 000000000..80f5809c1
--- /dev/null
+++ b/helm-charts/data-prep/Chart.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: data-prep
+description: The Helm chart for deploying data prep as microservice
+type: application
+version: 0-latest
+# The data prep microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tei
+    version: 0-latest
+    repository: file://../tei
+    condition: tei.enabled
+  - name: redis-vector-db
+    version: 0-latest
+    repository: file://../redis-vector-db
+    condition: redis-vector-db.enabled
+  - name: milvus
+    version: 4.2.12
+    repository: https://zilliztech.github.io/milvus-helm/
+    condition: milvus.enabled
diff --git a/helm-charts/data-prep/README.md b/helm-charts/data-prep/README.md
new file mode 100644
index 000000000..4a05e2f34
--- /dev/null
+++ b/helm-charts/data-prep/README.md
@@ -0,0 +1,58 @@
+# data-prep
+
+Helm chart for deploying data-prep microservice.
+
+data-prep will use redis and tei service, please specify the endpoints.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tei and redis-vector-db chart, please refer to the [tei](../tei/README.md) and [redis-vector-db](../redis-vector-db/README.md) for more information.
+
+After you've deployted the tei and redis-vector-db chart successfully, please run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `http://tei`, `redis://redis-vector-db:6379`.
+
+To install data-prep chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/data-prep
+export REDIS_URL="redis://redis-vector-db:6379"
+export TEI_EMBEDDING_ENDPOINT="http://tei"
+helm dependency update
+helm install data-prep . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/data-prep
+helm dependency update
+helm install data-prep . --set redis-vector-db.enabled=true --set tei.enabled=true
+
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/data-prep 6007:6007` to expose the data-prep service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:6007/v1/dataprep  \
+    -X POST \
+    -H "Content-Type: multipart/form-data" \
+    -F "files=@./README.md"
+```
+
+## Values
+
+| Key                    | Type   | Default                 | Description |
+| ---------------------- | ------ | ----------------------- | ----------- |
+| image.repository       | string | `"opea/dataprep-redis"` |             |
+| service.port           | string | `"6007"`                |             |
+| REDIS_URL              | string | `""`                    |             |
+| TEI_EMBEDDING_ENDPOINT | string | `""`                    |             |
+
+## Milvus support
+
+Refer to the milvus-values.yaml for milvus configurations.
diff --git a/helm-charts/data-prep/ci-values.yaml b/helm-charts/data-prep/ci-values.yaml
new file mode 100644
index 000000000..473698ec0
--- /dev/null
+++ b/helm-charts/data-prep/ci-values.yaml
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for data-prep.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: true
+redis-vector-db:
+  enabled: true
+milvus:
+  enabled: false
diff --git a/helm-charts/data-prep/milvus-values.yaml b/helm-charts/data-prep/milvus-values.yaml
new file mode 100644
index 000000000..3a3cb76b7
--- /dev/null
+++ b/helm-charts/data-prep/milvus-values.yaml
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for data-prep.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+milvus:
+  enabled: true
+  cluster:
+    enabled: false
+  etcd:
+    replicaCount: 1
+  pulsar:
+    enabled: false
+  minio:
+    mode: standalone
+redis-vector-db:
+  enabled: false
+tei:
+  enabled: true
+
+image:
+  repository: opea/dataprep-milvus
+
+port: 6010
+# text embedding inference service URL, e.g. http://<service-name>:<port>
+#TEI_EMBEDDING_ENDPOINT: "http://embedding-tei:80"
+# milvus DB configurations
+#MILVUS_HOST: "milvustest"
+MILVUS_PORT: "19530"
+COLLECTION_NAME: "rag_milvus"
+MOSEC_EMBEDDING_ENDPOINT: ""
+MOSEC_EMBEDDING_MODEL: ""
diff --git a/helm-charts/data-prep/templates/_helpers.tpl b/helm-charts/data-prep/templates/_helpers.tpl
new file mode 100644
index 000000000..8d2062396
--- /dev/null
+++ b/helm-charts/data-prep/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "data-prep.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "data-prep.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "data-prep.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "data-prep.labels" -}}
+helm.sh/chart: {{ include "data-prep.chart" . }}
+{{ include "data-prep.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "data-prep.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "data-prep.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "data-prep.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "data-prep.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/data-prep/templates/configmap.yaml b/helm-charts/data-prep/templates/configmap.yaml
new file mode 100644
index 000000000..e0306c0b1
--- /dev/null
+++ b/helm-charts/data-prep/templates/configmap.yaml
@@ -0,0 +1,54 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "data-prep.fullname" . }}-config
+  labels:
+    {{- include "data-prep.labels" . | nindent 4 }}
+data:
+  {{- if .Values.MOSEC_EMBEDDING_ENDPOINT }}
+  MOSEC_EMBEDDING_ENDPOINT: {{ .Values.MOSEC_EMBEDDING_ENDPOINT | quote}}
+  MOSEC_EMBEDDING_MODEL: {{ .Values.MOSEC_EMBEDDING_MODEL | quote}}
+  {{- else if .Values.TEI_EMBEDDING_ENDPOINT }}
+  TEI_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
+  TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
+  {{- else if not .Values.LOCAL_EMBEDDING_MODEL }}
+  TEI_ENDPOINT: "http://{{ .Release.Name }}-tei"
+  {{- end }}
+  {{- if .Values.LOCAL_EMBEDDING_MODEL }}
+  EMBED_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
+  LOCAL_EMBEDDING_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
+  {{- end }}
+  {{- if .Values.REDIS_URL }}
+  REDIS_URL: {{ .Values.REDIS_URL | quote}}
+  {{- else }}
+  REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379"
+  {{- end }}
+  INDEX_NAME: {{ .Values.INDEX_NAME | quote }}
+  KEY_INDEX_NAME: {{ .Values.KEY_INDEX_NAME | quote }}
+  SEARCH_BATCH_SIZE: {{ .Values.SEARCH_BATCH_SIZE | quote }}
+  {{- if .Values.MILVUS_HOST }}
+  MILVUS_HOST: {{ .Values.MILVUS_HOST | quote }}
+  {{- else }}
+  MILVUS_HOST: "{{ .Release.Name }}-milvus"
+  {{- end }}
+  MILVUS: {{ .Values.MILVUS_HOST | quote }}
+  MILVUS_PORT: {{ .Values.MILVUS_PORT | quote }}
+  {{- if .Values.COLLECTION_NAME }}
+  COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }}
+  {{- end }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.REDIS_URL) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }}
+  no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/data-prep/templates/deployment.yaml b/helm-charts/data-prep/templates/deployment.yaml
new file mode 100644
index 000000000..30faff173
--- /dev/null
+++ b/helm-charts/data-prep/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "data-prep.fullname" . }}
+  labels:
+    {{- include "data-prep.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "data-prep.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "data-prep.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "data-prep.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: data-prep
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "data-prep.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/data-prep/templates/service.yaml b/helm-charts/data-prep/templates/service.yaml
new file mode 100644
index 000000000..afeff3ecf
--- /dev/null
+++ b/helm-charts/data-prep/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "data-prep.fullname" . }}
+  labels:
+    {{- include "data-prep.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: data-prep
+  selector:
+    {{- include "data-prep.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/data-prep/templates/tests/test-pod.yaml b/helm-charts/data-prep/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..127fa1167
--- /dev/null
+++ b/helm-charts/data-prep/templates/tests/test-pod.yaml
@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ include "data-prep.fullname" . }}-testpod
+  labels:
+    {{- include "data-prep.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          echo "test file" > /tmp/file1.txt;
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep -sS --fail-with-body \
+            -X POST \
+            -H "Content-Type: multipart/form-data" \
+            -F "files=@/tmp/file1.txt" && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep/delete_file -sS \
+          -X POST \
+          -H "Content-Type: application/json" \
+          -d '{"file_path": "file1.txt"}';
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/data-prep/values.yaml b/helm-charts/data-prep/values.yaml
new file mode 100644
index 000000000..e35274fcb
--- /dev/null
+++ b/helm-charts/data-prep/values.yaml
@@ -0,0 +1,115 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for data-prep.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: false
+milvus:
+  enabled: false
+redis-vector-db:
+  enabled: false
+
+replicaCount: 1
+
+image:
+  repository: opea/dataprep-redis
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+port: 6007
+service:
+  type: ClusterIP
+  port: 6007
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: data-prep
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: data-prep
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: data-prep
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+# text embedding inference service URL, e.g. http://<service-name>:<port>
+TEI_EMBEDDING_ENDPOINT: ""
+
+# local embedder's model
+LOCAL_EMBEDDING_MODEL: ""
+
+# redis DB service URL, e.g. redis://<service-name>:<port>
+REDIS_URL: ""
+INDEX_NAME: "rag-redis"
+KEY_INDEX_NAME: "file-keys"
+SEARCH_BATCH_SIZE: 10
+
+# milvus DB configurations
+MILVUS_HOST: ""
+MILVUS_PORT: ""
+COLLECTION_NAME: ""
+MOSEC_EMBEDDING_ENDPOINT: ""
+MOSEC_EMBEDDING_MODEL: ""
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
diff --git a/helm-charts/embedding-usvc/.helmignore b/helm-charts/embedding-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/embedding-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/embedding-usvc/Chart.yaml b/helm-charts/embedding-usvc/Chart.yaml
new file mode 100644
index 000000000..7edaba721
--- /dev/null
+++ b/helm-charts/embedding-usvc/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: embedding-usvc
+description: The Helm chart for deploying embedding as microservice
+type: application
+version: 0-latest
+# The embedding microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tei
+    version: 0-latest
+    repository: file://../tei
+    condition: tei.enabled
diff --git a/helm-charts/embedding-usvc/README.md b/helm-charts/embedding-usvc/README.md
new file mode 100644
index 000000000..2bc0ed4bd
--- /dev/null
+++ b/helm-charts/embedding-usvc/README.md
@@ -0,0 +1,52 @@
+# embedding-usvc
+
+Helm chart for deploying embedding microservice.
+
+embedding-usvc depends on TEI, set TEI_EMBEDDING_ENDPOINT.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tei chart, please refer to the [tei](../tei) chart for more information.
+
+After you've deployted the tei chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e. `http://tei`.
+
+To install the embedding-usvc chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/embedding-usvc
+export TEI_EMBEDDING_ENDPOINT="http://tei"
+helm dependency update
+helm install embedding-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/embedding-usvc
+helm dependency update
+helm install embedding-usvc . --set tei.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/embedding-usvc 6000:6000` to expose the embedding-usvc service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:6000/v1/embeddings \
+    -X POST \
+    -d '{"text":"hello"}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                    | Type   | Default                | Description |
+| ---------------------- | ------ | ---------------------- | ----------- |
+| image.repository       | string | `"opea/embedding-tei"` |             |
+| service.port           | string | `"6000"`               |             |
+| TEI_EMBEDDING_ENDPOINT | string | `""`                   |             |
+| global.monitoring      | bool   | `false`                |             |
diff --git a/helm-charts/embedding-usvc/ci-values.yaml b/helm-charts/embedding-usvc/ci-values.yaml
new file mode 100644
index 000000000..543c69570
--- /dev/null
+++ b/helm-charts/embedding-usvc/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for embedding-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: true
diff --git a/helm-charts/embedding-usvc/templates/_helpers.tpl b/helm-charts/embedding-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..229f1a5b5
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "embedding-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "embedding-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "embedding-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "embedding-usvc.labels" -}}
+helm.sh/chart: {{ include "embedding-usvc.chart" . }}
+{{ include "embedding-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "embedding-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "embedding-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/embedding-usvc/templates/configmap.yaml b/helm-charts/embedding-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..5ec5904ad
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/configmap.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "embedding-usvc.fullname" . }}-config
+  labels:
+    {{- include "embedding-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.TEI_EMBEDDING_ENDPOINT }}
+  TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote }}
+  {{- else }}
+  TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei"
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tei,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/embedding-usvc/templates/deployment.yaml b/helm-charts/embedding-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..67b2fae99
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "embedding-usvc.fullname" . }}
+  labels:
+    {{- include "embedding-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "embedding-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "embedding-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "embedding-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: embedding-usvc
+              containerPort: 6000
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "embedding-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/embedding-usvc/templates/service.yaml b/helm-charts/embedding-usvc/templates/service.yaml
new file mode 100644
index 000000000..2aff873f2
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "embedding-usvc.fullname" . }}
+  labels:
+    {{- include "embedding-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 6000
+      protocol: TCP
+      name: embedding-usvc
+  selector:
+    {{- include "embedding-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/embedding-usvc/templates/servicemonitor.yaml b/helm-charts/embedding-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..ea26f6cc2
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "embedding-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "embedding-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: embedding-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/embedding-usvc/templates/tests/test-pod.yaml b/helm-charts/embedding-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..7acc267b3
--- /dev/null
+++ b/helm-charts/embedding-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "embedding-usvc.fullname" . }}-testpod"
+  labels:
+    {{- include "embedding-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "embedding-usvc.fullname" . }}:{{ .Values.service.port }}/v1/embeddings -sS --fail-with-body \
+            -X POST \
+            -d '{"text":"hello"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/embedding-usvc/values.yaml b/helm-charts/embedding-usvc/values.yaml
new file mode 100644
index 000000000..66f79d789
--- /dev/null
+++ b/helm-charts/embedding-usvc/values.yaml
@@ -0,0 +1,98 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for embedding-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+TEI_EMBEDDING_ENDPOINT: ""
+image:
+  repository: opea/embedding-tei
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for embedding service is 9000
+  port: 6000
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: embedding-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: embedding-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: embedding-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/gpt-sovits/.helmignore b/helm-charts/gpt-sovits/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/gpt-sovits/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/gpt-sovits/Chart.yaml b/helm-charts/gpt-sovits/Chart.yaml
new file mode 100644
index 000000000..65d83a9dc
--- /dev/null
+++ b/helm-charts/gpt-sovits/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: gpt-sovits
+description: The Helm chart for deploying gpt-sovits as microservice
+type: application
+version: 0-latest
+# The gpt-sovits microservice server version
+appVersion: "1.0"
diff --git a/helm-charts/gpt-sovits/README.md b/helm-charts/gpt-sovits/README.md
new file mode 100644
index 000000000..9f8cdd7ab
--- /dev/null
+++ b/helm-charts/gpt-sovits/README.md
@@ -0,0 +1,44 @@
+# gpt-sovits
+
+Helm chart for deploying gpt-sovits microservice.
+
+## Install the chart
+
+```console
+cd GenAIInfra/helm-charts/common/
+helm install gpt-sovits gpt-sovits
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/gpt-sovits 9880:9880` to expose the gpt-sovits service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+- Chinese only
+
+```bash
+curl localhost:9880/ -XPOST -d '{
+    "text": "先帝创业未半而中道崩殂，今天下三分，益州疲弊，此诚危急存亡之秋也。",
+    "text_language": "zh"
+}' --output out.wav
+```
+
+- English only
+
+```bash
+curl localhost:9880/ -XPOST -d '{
+    "text": "Discuss the evolution of text-to-speech (TTS) technology from its early beginnings to the present day. Highlight the advancements in natural language processing that have contributed to more realistic and human-like speech synthesis. Also, explore the various applications of TTS in education, accessibility, and customer service, and predict future trends in this field. Write a comprehensive overview of text-to-speech (TTS) technology.",
+    "text_language": "en"
+}' --output out.wav
+```
+
+## Values
+
+| Key              | Type   | Default             | Description |
+| ---------------- | ------ | ------------------- | ----------- |
+| image.repository | string | `"opea/gpt-sovits"` |             |
+| service.port     | string | `"9880"`            |             |
+| TTS_ENDPOINT     | string | `""`                |             |
diff --git a/helm-charts/gpt-sovits/ci-values.yaml b/helm-charts/gpt-sovits/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/gpt-sovits/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/gpt-sovits/templates/_helpers.tpl b/helm-charts/gpt-sovits/templates/_helpers.tpl
new file mode 100644
index 000000000..f42c32caa
--- /dev/null
+++ b/helm-charts/gpt-sovits/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "gpt-sovits.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "gpt-sovits.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "gpt-sovits.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "gpt-sovits.labels" -}}
+helm.sh/chart: {{ include "gpt-sovits.chart" . }}
+{{ include "gpt-sovits.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "gpt-sovits.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "gpt-sovits.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "gpt-sovits.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "gpt-sovits.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/gpt-sovits/templates/configmap.yaml b/helm-charts/gpt-sovits/templates/configmap.yaml
new file mode 100644
index 000000000..898f0bfe4
--- /dev/null
+++ b/helm-charts/gpt-sovits/templates/configmap.yaml
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "gpt-sovits.fullname" . }}-config
+  labels:
+    {{- include "gpt-sovits.labels" . | nindent 4 }}
+data:
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  NUMBA_CACHE_DIR: "/tmp/.cache/numba"
+  HF_HOME: "/tmp/.cache/huggingface"
+  XDG_CACHE_HOME: "/tmp/.cache"
diff --git a/helm-charts/gpt-sovits/templates/deployment.yaml b/helm-charts/gpt-sovits/templates/deployment.yaml
new file mode 100644
index 000000000..d5d6e9d90
--- /dev/null
+++ b/helm-charts/gpt-sovits/templates/deployment.yaml
@@ -0,0 +1,94 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gpt-sovits.fullname" . }}
+  labels:
+    {{- include "gpt-sovits.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "gpt-sovits.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "gpt-sovits.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "gpt-sovits.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: gpt-sovits
+              containerPort: 9880
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "gpt-sovits.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/gpt-sovits/templates/service.yaml b/helm-charts/gpt-sovits/templates/service.yaml
new file mode 100644
index 000000000..ba55efb36
--- /dev/null
+++ b/helm-charts/gpt-sovits/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gpt-sovits.fullname" . }}
+  labels:
+    {{- include "gpt-sovits.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9880
+      protocol: TCP
+      name: gpt-sovits
+  selector:
+    {{- include "gpt-sovits.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/gpt-sovits/templates/tests/test-pod.yaml b/helm-charts/gpt-sovits/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..4d13086b4
--- /dev/null
+++ b/helm-charts/gpt-sovits/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "gpt-sovits.fullname" . }}-testpod"
+  labels:
+    {{- include "gpt-sovits.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "gpt-sovits.fullname" . }}:{{ .Values.service.port }}/ \
+            -X POST \
+            -d '{"text": "Discuss the evolution of text-to-speech.", "text_language": "en"}' \
+            --output /tmp/out.wav && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/gpt-sovits/values.yaml b/helm-charts/gpt-sovits/values.yaml
new file mode 100644
index 000000000..d5cff30d7
--- /dev/null
+++ b/helm-charts/gpt-sovits/values.yaml
@@ -0,0 +1,83 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for whisper.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+image:
+  repository: opea/gpt-sovits
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  allowPrivilegeEscalation: false
+  readOnlyRootFilesystem: true
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 9880
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  tcpSocket:
+    port: gpt-sovits
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 4
+  failureThreshold: 24
+readinessProbe:
+  tcpSocket:
+    port: gpt-sovits
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 4
+startupProbe:
+  tcpSocket:
+    port: gpt-sovits
+  initialDelaySeconds: 10
+  periodSeconds: 5
+  failureThreshold: 120
+  timeoutSeconds: 2
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/guardrails-usvc/.helmignore b/helm-charts/guardrails-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/guardrails-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/guardrails-usvc/Chart.yaml b/helm-charts/guardrails-usvc/Chart.yaml
new file mode 100644
index 000000000..1f229fe41
--- /dev/null
+++ b/helm-charts/guardrails-usvc/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: guardrails-usvc
+description: The Helm chart for deploying guardrails-usvc as microservice
+type: application
+version: 0-latest
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 0-latest
+    alias: tgi-guardrails
+    repository: file://../tgi
+    condition: tgi-guardrails.enabled
diff --git a/helm-charts/guardrails-usvc/README.md b/helm-charts/guardrails-usvc/README.md
new file mode 100644
index 000000000..b5656d04d
--- /dev/null
+++ b/helm-charts/guardrails-usvc/README.md
@@ -0,0 +1,57 @@
+# guardrails-usvc
+
+Helm chart for deploying LLM microservice.
+
+guardrails-usvc depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information. Please use model `meta-llama/Meta-Llama-Guard-2-8B` during installation.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+export SAFETY_GUARD_ENDPOINT="http://tgi"
+export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --wait
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/guardrails-usvc
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi-guardrails.enabled=true --wait
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/guardrails-usvc 9090:9090` to expose the llm-uservice service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9090/v1/guardrails \
+    -X POST \
+    -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                              | Description                                                                                                                                                  |
+| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`                                 | Your own Hugging Face API token                                                                                                                              |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                 | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory |
+| image.repository                | string | `"opea/guardrails-usvc"`             |                                                                                                                                                              |
+| service.port                    | string | `"9090"`                             |                                                                                                                                                              |
+| SAFETY_GUARD_ENDPOINT           | string | `""`                                 | LLM endpoint                                                                                                                                                 |
+| SAFETY_GUARD_MODEL_ID           | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using                                                                                                             |
diff --git a/helm-charts/guardrails-usvc/ci-values.yaml b/helm-charts/guardrails-usvc/ci-values.yaml
new file mode 100644
index 000000000..3aef2fce5
--- /dev/null
+++ b/helm-charts/guardrails-usvc/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for guardrails-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi-guardrails:
+  enabled: true
diff --git a/helm-charts/guardrails-usvc/templates/_helpers.tpl b/helm-charts/guardrails-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..088f88455
--- /dev/null
+++ b/helm-charts/guardrails-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "guardrails-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "guardrails-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "guardrails-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "guardrails-usvc.labels" -}}
+helm.sh/chart: {{ include "guardrails-usvc.chart" . }}
+{{ include "guardrails-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "guardrails-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "guardrails-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "guardrails-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "guardrails-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/guardrails-usvc/templates/configmap.yaml b/helm-charts/guardrails-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..86cc30efc
--- /dev/null
+++ b/helm-charts/guardrails-usvc/templates/configmap.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}-config
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.SAFETY_GUARD_ENDPOINT }}
+  SAFETY_GUARD_ENDPOINT: {{ tpl .Values.SAFETY_GUARD_ENDPOINT . | quote}}
+  {{- else }}
+  SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails"
+  {{- end }}
+  SAFETY_GUARD_MODEL_ID: {{ .Values.SAFETY_GUARD_MODEL_ID | quote }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.SAFETY_GUARD_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tgi-guardrails,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
diff --git a/helm-charts/guardrails-usvc/templates/deployment.yaml b/helm-charts/guardrails-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..bebf86e59
--- /dev/null
+++ b/helm-charts/guardrails-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "guardrails-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "guardrails-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "guardrails-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: guardrails-usvc
+              containerPort: 9090
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "guardrails-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/guardrails-usvc/templates/service.yaml b/helm-charts/guardrails-usvc/templates/service.yaml
new file mode 100644
index 000000000..594312f03
--- /dev/null
+++ b/helm-charts/guardrails-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "guardrails-usvc.fullname" . }}
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9090
+      protocol: TCP
+      name: guardrails-usvc
+  selector:
+    {{- include "guardrails-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml b/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..ec077d430
--- /dev/null
+++ b/helm-charts/guardrails-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "guardrails-usvc.fullname" . }}-testpod"
+  labels:
+    {{- include "guardrails-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "guardrails-usvc.fullname" . }}:{{ .Values.service.port }}/v1/guardrails -sS --fail-with-body \
+              -X POST \
+              -d '{"text":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}' \
+              -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/guardrails-usvc/values.yaml b/helm-charts/guardrails-usvc/values.yaml
new file mode 100644
index 000000000..56a0cc6d4
--- /dev/null
+++ b/helm-charts/guardrails-usvc/values.yaml
@@ -0,0 +1,96 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for guardrails-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi-guardrails:
+  enabled: false
+  LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+
+replicaCount: 1
+
+# TGI service endpoint
+SAFETY_GUARD_ENDPOINT: ""
+# Guard Model Id
+SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+image:
+  repository: opea/guardrails-tgi
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 9090
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: guardrails-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
diff --git a/helm-charts/llm-uservice/.helmignore b/helm-charts/llm-uservice/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/llm-uservice/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/llm-uservice/Chart.yaml b/helm-charts/llm-uservice/Chart.yaml
new file mode 100644
index 000000000..aefefd516
--- /dev/null
+++ b/helm-charts/llm-uservice/Chart.yaml
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: llm-uservice
+description: The Helm chart for deploying llm as microservice
+type: application
+version: 0-latest
+# The llm microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 0-latest
+    repository: file://../tgi
+    condition: tgi.enabled
+  - name: vllm
+    version: 0-latest
+    repository: file://../vllm
+    condition: vllm.enabled
diff --git a/helm-charts/llm-uservice/README.md b/helm-charts/llm-uservice/README.md
new file mode 100644
index 000000000..a1a8d6d47
--- /dev/null
+++ b/helm-charts/llm-uservice/README.md
@@ -0,0 +1,55 @@
+# llm-uservice
+
+Helm chart for deploying LLM microservice.
+
+llm-uservice depends on TGI, you should set TGI_LLM_ENDPOINT as tgi endpoint.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/llm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+export TGI_LLM_ENDPOINT="http://tgi"
+helm dependency update
+helm install llm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} --wait
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/llm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install llm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/llm-uservice 9000:9000` to expose the llm-uservice service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9000/v1/chat/completions \
+    -X POST \
+    -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default          | Description                     |
+| ------------------------------- | ------ | ---------------- | ------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`             | Your own Hugging Face API token |
+| image.repository                | string | `"opea/llm-tgi"` |                                 |
+| service.port                    | string | `"9000"`         |                                 |
+| TGI_LLM_ENDPOINT                | string | `""`             | LLM endpoint                    |
+| global.monitoring               | bool   | `false`          | Service usage metrics           |
diff --git a/helm-charts/llm-uservice/ci-docsum-values.yaml b/helm-charts/llm-uservice/ci-docsum-values.yaml
new file mode 100644
index 000000000..b9f269c5a
--- /dev/null
+++ b/helm-charts/llm-uservice/ci-docsum-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/llm-docsum-tgi
+  tag: "latest"
+tgi:
+  enabled: true
diff --git a/helm-charts/llm-uservice/ci-faqgen-values.yaml b/helm-charts/llm-uservice/ci-faqgen-values.yaml
new file mode 100644
index 000000000..f7f3f5a55
--- /dev/null
+++ b/helm-charts/llm-uservice/ci-faqgen-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/llm-faqgen-tgi
+  tag: "latest"
+
+tgi:
+  enabled: true
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
diff --git a/helm-charts/llm-uservice/ci-values.yaml b/helm-charts/llm-uservice/ci-values.yaml
new file mode 100644
index 000000000..88eef5b4a
--- /dev/null
+++ b/helm-charts/llm-uservice/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for llm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi:
+  enabled: true
diff --git a/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml b/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml
new file mode 100644
index 000000000..2438eaed9
--- /dev/null
+++ b/helm-charts/llm-uservice/ci-vllm-gaudi-values.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for llm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+tgi:
+  enabled: false
+vllm:
+  enabled: true
+  image:
+    repository: opea/vllm-gaudi
+    tag: "latest"
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  OMPI_MCA_btl_vader_single_copy_mechanism: none
+  extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
+  resources:
+    limits:
+      habana.ai/gaudi: 1
+
+vLLM_ENDPOINT: ""
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+image:
+  repository: opea/llm-vllm
+  tag: "latest"
diff --git a/helm-charts/llm-uservice/templates/_helpers.tpl b/helm-charts/llm-uservice/templates/_helpers.tpl
new file mode 100644
index 000000000..d67db64c2
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "llm-uservice.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "llm-uservice.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "llm-uservice.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "llm-uservice.labels" -}}
+helm.sh/chart: {{ include "llm-uservice.chart" . }}
+{{ include "llm-uservice.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "llm-uservice.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "llm-uservice.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/llm-uservice/templates/configmap.yaml b/helm-charts/llm-uservice/templates/configmap.yaml
new file mode 100644
index 000000000..bd49777dc
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/configmap.yaml
@@ -0,0 +1,36 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}-config
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+data:
+  {{- if .Values.TGI_LLM_ENDPOINT }}
+  TGI_LLM_ENDPOINT: {{ .Values.TGI_LLM_ENDPOINT | quote}}
+  {{- else }}
+  TGI_LLM_ENDPOINT: "http://{{ .Release.Name }}-tgi"
+  {{- end }}
+  {{- if .Values.vLLM_ENDPOINT }}
+  vLLM_ENDPOINT: {{ .Values.vLLM_ENDPOINT | quote}}
+  {{- else }}
+  vLLM_ENDPOINT: "http://{{ .Release.Name }}-vllm"
+  {{- end }}
+  {{- if .Values.LLM_MODEL_ID }}
+  LLM_MODEL: {{ .Values.LLM_MODEL_ID | quote}}
+  {{- end }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if or .Values.global.http_proxy .Values.global.https_proxy }}
+  no_proxy: "{{ .Release.Name }}-tgi,{{ .Release.Name }}-vllm,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/llm-uservice/templates/deployment.yaml b/helm-charts/llm-uservice/templates/deployment.yaml
new file mode 100644
index 000000000..dfa4e6022
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "llm-uservice.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "llm-uservice.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "llm-uservice.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: llm-uservice
+              containerPort: 9000
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "llm-uservice.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/llm-uservice/templates/service.yaml b/helm-charts/llm-uservice/templates/service.yaml
new file mode 100644
index 000000000..06e13b8c6
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9000
+      protocol: TCP
+      name: llm-uservice
+  selector:
+    {{- include "llm-uservice.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/llm-uservice/templates/servicemonitor.yaml b/helm-charts/llm-uservice/templates/servicemonitor.yaml
new file mode 100644
index 000000000..ecb83fc34
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "llm-uservice.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "llm-uservice.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: llm-uservice
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/llm-uservice/templates/tests/test-pod.yaml b/helm-charts/llm-uservice/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..e5012fc75
--- /dev/null
+++ b/helm-charts/llm-uservice/templates/tests/test-pod.yaml
@@ -0,0 +1,44 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "llm-uservice.fullname" . }}-testpod"
+  labels:
+    {{- include "llm-uservice.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+          {{- if contains "llm-docsum-tgi" .Values.image.repository }}
+          # Try with docsum endpoint
+            curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/docsum -sS --fail-with-body \
+              -X POST \
+              -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' \
+              -H 'Content-Type: application/json' && break;
+          {{- else if contains "llm-faqgen-tgi" .Values.image.repository }}
+          # Try with faqgen endpoint
+            curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \
+              -X POST \
+              -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.","max_tokens":17}' \
+              -H 'Content-Type: application/json' && break;
+          {{- else }}
+            curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
+              -X POST \
+              -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+              -H 'Content-Type: application/json' && break;
+          {{- end }}
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/llm-uservice/values.yaml b/helm-charts/llm-uservice/values.yaml
new file mode 100644
index 000000000..8908bb74c
--- /dev/null
+++ b/helm-charts/llm-uservice/values.yaml
@@ -0,0 +1,105 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for llm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi:
+  enabled: false
+vllm:
+  enabled: false
+
+replicaCount: 1
+# For tgi
+TGI_LLM_ENDPOINT: ""
+# For vllm, set the LLM_MODEL_ID the same as vllm sub chart
+vLLM_ENDPOINT: ""
+LLM_MODEL_ID: ""
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+image:
+  repository: opea/llm-tgi
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for llm service is 9000
+  port: 9000
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: llm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/llm-uservice/variant_docsum-values.yaml b/helm-charts/llm-uservice/variant_docsum-values.yaml
new file mode 100644
index 000000000..9e1f33bde
--- /dev/null
+++ b/helm-charts/llm-uservice/variant_docsum-values.yaml
@@ -0,0 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/llm-docsum-tgi
+  tag: "latest"
diff --git a/helm-charts/llm-uservice/variant_faqgen-values.yaml b/helm-charts/llm-uservice/variant_faqgen-values.yaml
new file mode 100644
index 000000000..4e51fdd1d
--- /dev/null
+++ b/helm-charts/llm-uservice/variant_faqgen-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/llm-faqgen-tgi
+  tag: "latest"
+
+tgi:
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
diff --git a/helm-charts/lvm-uservice/.helmignore b/helm-charts/lvm-uservice/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/lvm-uservice/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/lvm-uservice/Chart.yaml b/helm-charts/lvm-uservice/Chart.yaml
new file mode 100644
index 000000000..66375e7ae
--- /dev/null
+++ b/helm-charts/lvm-uservice/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: lvm-uservice
+description: The Helm chart for deploying lvm as microservice
+type: application
+version: 0-latest
+# The lvm microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tgi
+    version: 0-latest
+    repository: file://../tgi
+    condition: tgi.enabled
diff --git a/helm-charts/lvm-uservice/README.md b/helm-charts/lvm-uservice/README.md
new file mode 100644
index 000000000..d8bfcd6b0
--- /dev/null
+++ b/helm-charts/lvm-uservice/README.md
@@ -0,0 +1,55 @@
+# lvm-uservice
+
+Helm chart for deploying LVM microservice.
+
+lvm-uservice depends on TGI, you should set LVM_ENDPOINT as tgi endpoint.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tgi chart, please refer to the [tgi](../tgi) chart for more information.
+
+After you've deployted the tgi chart successfully, please run `kubectl get svc` to get the tgi service endpoint, i.e. `http://tgi`.
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/lvm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+export LVM_ENDPOINT="http://tgi"
+helm dependency update
+helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/lvm-uservice
+export HFTOKEN="insert-your-huggingface-token-here"
+helm dependency update
+helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set tgi.enabled=true --wait
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/lvm-uservice 9000:9000` to expose the lvm-uservice service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9000/v1/chat/completions \
+    -X POST \
+    -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default          | Description                     |
+| ------------------------------- | ------ | ---------------- | ------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `""`             | Your own Hugging Face API token |
+| image.repository                | string | `"opea/lvm-tgi"` |                                 |
+| service.port                    | string | `"9000"`         |                                 |
+| LVM_ENDPOINT                    | string | `""`             | LVM endpoint                    |
+| global.monitoring               | bool   | `false`          | Service usage metrics           |
diff --git a/helm-charts/lvm-uservice/ci-values.yaml b/helm-charts/lvm-uservice/ci-values.yaml
new file mode 100644
index 000000000..a4c378251
--- /dev/null
+++ b/helm-charts/lvm-uservice/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for lvm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi:
+  enabled: true
diff --git a/helm-charts/lvm-uservice/templates/_helpers.tpl b/helm-charts/lvm-uservice/templates/_helpers.tpl
new file mode 100644
index 000000000..86c26b0e6
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "lvm-uservice.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "lvm-uservice.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "lvm-uservice.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "lvm-uservice.labels" -}}
+helm.sh/chart: {{ include "lvm-uservice.chart" . }}
+{{ include "lvm-uservice.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "lvm-uservice.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "lvm-uservice.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "lvm-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "lvm-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/lvm-uservice/templates/configmap.yaml b/helm-charts/lvm-uservice/templates/configmap.yaml
new file mode 100644
index 000000000..62f75d323
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/configmap.yaml
@@ -0,0 +1,28 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "lvm-uservice.fullname" . }}-config
+  labels:
+    {{- include "lvm-uservice.labels" . | nindent 4 }}
+data:
+  {{- if .Values.LVM_ENDPOINT }}
+  LVM_ENDPOINT: {{ .Values.LVM_ENDPOINT | quote}}
+  {{- else }}
+  LVM_ENDPOINT: "http://{{ .Release.Name }}-tgi"
+  {{- end }}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.LVM_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tgi,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/lvm-uservice/templates/deployment.yaml b/helm-charts/lvm-uservice/templates/deployment.yaml
new file mode 100644
index 000000000..c276087d6
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "lvm-uservice.fullname" . }}
+  labels:
+    {{- include "lvm-uservice.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "lvm-uservice.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "lvm-uservice.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "lvm-uservice.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: lvm-uservice
+              containerPort: 9399
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "lvm-uservice.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/lvm-uservice/templates/service.yaml b/helm-charts/lvm-uservice/templates/service.yaml
new file mode 100644
index 000000000..37e454dec
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "lvm-uservice.fullname" . }}
+  labels:
+    {{- include "lvm-uservice.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9399
+      protocol: TCP
+      name: lvm-uservice
+  selector:
+    {{- include "lvm-uservice.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/lvm-uservice/templates/servicemonitor.yaml b/helm-charts/lvm-uservice/templates/servicemonitor.yaml
new file mode 100644
index 000000000..9fe58419b
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "lvm-uservice.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "lvm-uservice.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: lvm-uservice
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/lvm-uservice/templates/tests/test-pod.yaml b/helm-charts/lvm-uservice/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..7782cdb1e
--- /dev/null
+++ b/helm-charts/lvm-uservice/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "lvm-uservice.fullname" . }}-testpod"
+  labels:
+    {{- include "lvm-uservice.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "lvm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/lvm -sS --fail-with-body \
+              -X POST \
+              -d '{"image":"iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC","prompt":"What is this?"}' \
+              -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/lvm-uservice/values.yaml b/helm-charts/lvm-uservice/values.yaml
new file mode 100644
index 000000000..75188a7e0
--- /dev/null
+++ b/helm-charts/lvm-uservice/values.yaml
@@ -0,0 +1,102 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for lvm-uservice.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tgi:
+  enabled: false
+  LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf
+  MAX_INPUT_LENGTH: "4096"
+  MAX_TOTAL_TOKENS: "8192"
+
+replicaCount: 1
+LVM_ENDPOINT: ""
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+image:
+  repository: opea/lvm-tgi
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for lvm service is 9399
+  port: 9399
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: lvm-uservice
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/mongodb/.helmignore b/helm-charts/mongodb/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/mongodb/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/mongodb/Chart.yaml b/helm-charts/mongodb/Chart.yaml
new file mode 100644
index 000000000..a93f27403
--- /dev/null
+++ b/helm-charts/mongodb/Chart.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: mongodb
+description: The Helm chart for Redis Vector DB
+type: application
+version: 0-latest
+appVersion: "7.0.11"
diff --git a/helm-charts/mongodb/README.md b/helm-charts/mongodb/README.md
new file mode 100644
index 000000000..eebb49c04
--- /dev/null
+++ b/helm-charts/mongodb/README.md
@@ -0,0 +1,28 @@
+# mongodb
+
+Helm chart for deploying mongo DB service.
+
+## Install the Chart
+
+To install the chart, run the following:
+
+```console
+cd ${GenAIInfro_repo}/helm-charts/common
+helm install mongodb mongodb
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all the mongo pods are runinng.
+
+Then run the command `kubectl port-forward svc/mongodb 27017:27017` to expose the mongodb service for access.
+
+Open another terminal and run the command `mongo --eval 'db.runCommand("ping").ok' localhost:27017/test --quiet ` to test mongodb access. The `mongo` command should return `1`.
+
+## Values
+
+| Key              | Type   | Default    | Description              |
+| ---------------- | ------ | ---------- | ------------------------ |
+| image.repository | string | `"mongo"`  |                          |
+| image.tag        | string | `"7.0.11"` |                          |
+| service.port     | string | `"27017"`  | The mongodb service port |
diff --git a/helm-charts/mongodb/ci-values.yaml b/helm-charts/mongodb/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/mongodb/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/mongodb/templates/_helpers.tpl b/helm-charts/mongodb/templates/_helpers.tpl
new file mode 100644
index 000000000..6b1b8c179
--- /dev/null
+++ b/helm-charts/mongodb/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "mongodb.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "mongodb.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "mongodb.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "mongodb.labels" -}}
+helm.sh/chart: {{ include "mongodb.chart" . }}
+{{ include "mongodb.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "mongodb.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "mongodb.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "mongodb.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "mongodb.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/mongodb/templates/deployment.yaml b/helm-charts/mongodb/templates/deployment.yaml
new file mode 100644
index 000000000..4fca11ffd
--- /dev/null
+++ b/helm-charts/mongodb/templates/deployment.yaml
@@ -0,0 +1,78 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "mongodb.fullname" . }}
+  labels:
+    {{- include "mongodb.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "mongodb.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "mongodb.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          volumeMounts:
+            - mountPath: /data
+              name: data-volume
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: port
+              containerPort: 27017
+              protocol: TCP
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: data-volume
+          emptyDir: {}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "mongodb.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/mongodb/templates/service.yaml b/helm-charts/mongodb/templates/service.yaml
new file mode 100644
index 000000000..979628dbd
--- /dev/null
+++ b/helm-charts/mongodb/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mongodb.fullname" . }}
+  labels:
+    {{- include "mongodb.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 27017
+      protocol: TCP
+      name: mongodb
+  selector:
+    {{- include "mongodb.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/mongodb/templates/tests/test-pod.yaml b/helm-charts/mongodb/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..8236679ba
--- /dev/null
+++ b/helm-charts/mongodb/templates/tests/test-pod.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ include "mongodb.fullname" . }}-testpod
+  labels:
+    {{- include "mongodb.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: mongoclient
+      image: mongoclient/mongoclient:latest
+      command: ['sh', '-c']
+      args:
+        - |
+          set -x
+          mongo --eval 'db.runCommand("ping").ok' {{ include "mongodb.fullname" . }}:{{ .Values.service.port }}/test --quiet
+  restartPolicy: Never
diff --git a/helm-charts/mongodb/values.yaml b/helm-charts/mongodb/values.yaml
new file mode 100644
index 000000000..cb38a714d
--- /dev/null
+++ b/helm-charts/mongodb/values.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+image:
+  repository: mongo
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "7.0.11"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: false
+  runAsUser: 999
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 27017
+
+startupProbe:
+  tcpSocket:
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
diff --git a/helm-charts/prompt-usvc/.helmignore b/helm-charts/prompt-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/prompt-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/prompt-usvc/Chart.yaml b/helm-charts/prompt-usvc/Chart.yaml
new file mode 100644
index 000000000..a564d48c5
--- /dev/null
+++ b/helm-charts/prompt-usvc/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: prompt-usvc
+description: The Helm chart for deploying prompt as microservice
+type: application
+version: 0-latest
+# The prompt microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: mongodb
+    version: 0-latest
+    repository: file://../mongodb
+    condition: mongodb.enabled
diff --git a/helm-charts/prompt-usvc/README.md b/helm-charts/prompt-usvc/README.md
new file mode 100644
index 000000000..38b240ed1
--- /dev/null
+++ b/helm-charts/prompt-usvc/README.md
@@ -0,0 +1,54 @@
+# prompt-usvc
+
+Helm chart for deploying prompt-usvc microservice.
+
+prompt-usvc will use mongo database service, please specify the endpoints.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the mongodb chart, please refer to the [mongodb](../mongodb) for more information.
+
+After you've deployted the mongodb chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `mongodb:27017`.
+
+To install prompt-usvc chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/prompt-usvc
+export MONGO_HOST="mongodb"
+export MONGO_PORT="27017"
+helm dependency update
+helm install prompt-usvc . --set MONGO_HOST=${MONGO_HOST} --set MONGO_PORT=${MONGO_PORT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/prompt-usvc
+helm dependency update
+helm install prompt-usvc . --set mongodb.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/prompt-usvc 6018:6018` to expose the prompt-usvc service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl -X 'POST' \
+  http://localhost:6018/v1/prompt/create \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{"prompt_text": "test prompt", "user": "test"}';
+```
+
+## Values
+
+| Key              | Type   | Default                       | Description |
+| ---------------- | ------ | ----------------------------- | ----------- |
+| image.repository | string | `"opea/promptregistry-mongo"` |             |
+| service.port     | string | `"6018"`                      |             |
+| MONGO_HOST       | string | `""`                          |             |
+| MONGO_PORT       | string | `""`                          |             |
diff --git a/helm-charts/prompt-usvc/ci-values.yaml b/helm-charts/prompt-usvc/ci-values.yaml
new file mode 100644
index 000000000..1e0d5c386
--- /dev/null
+++ b/helm-charts/prompt-usvc/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for prompt-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+mongodb:
+  enabled: true
diff --git a/helm-charts/prompt-usvc/templates/_helpers.tpl b/helm-charts/prompt-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..147787115
--- /dev/null
+++ b/helm-charts/prompt-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "prompt-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "prompt-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "prompt-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "prompt-usvc.labels" -}}
+helm.sh/chart: {{ include "prompt-usvc.chart" . }}
+{{ include "prompt-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "prompt-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "prompt-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "prompt-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "prompt-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/prompt-usvc/templates/configmap.yaml b/helm-charts/prompt-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..e78bba220
--- /dev/null
+++ b/helm-charts/prompt-usvc/templates/configmap.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "prompt-usvc.fullname" . }}-config
+  labels:
+    {{- include "prompt-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.MONGO_HOST }}
+  MONGO_HOST: {{ .Values.MONGO_HOST | quote}}
+  {{- else }}
+  MONGO_HOST: "{{ .Release.Name }}-mongodb"
+  {{- end }}
+  {{- if .Values.MONGO_PORT }}
+  MONGO_PORT: {{ .Values.MONGO_PORT | quote }}
+  {{- else }}
+  MONGO_PORT: "27017"
+  {{- end }}
+  DB_NAME: {{ .Values.DB_NAME | quote }}
+  COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.MONGO_HOST) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-mongodb,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/prompt-usvc/templates/deployment.yaml b/helm-charts/prompt-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..836668384
--- /dev/null
+++ b/helm-charts/prompt-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "prompt-usvc.fullname" . }}
+  labels:
+    {{- include "prompt-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "prompt-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "prompt-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "prompt-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: port
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "prompt-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/prompt-usvc/templates/service.yaml b/helm-charts/prompt-usvc/templates/service.yaml
new file mode 100644
index 000000000..f22105fd8
--- /dev/null
+++ b/helm-charts/prompt-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "prompt-usvc.fullname" . }}
+  labels:
+    {{- include "prompt-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: port
+  selector:
+    {{- include "prompt-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/prompt-usvc/templates/tests/test-pod.yaml b/helm-charts/prompt-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..70d1ea3ed
--- /dev/null
+++ b/helm-charts/prompt-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ include "prompt-usvc.fullname" . }}-testpod
+  labels:
+    {{- include "prompt-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          set -x
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -X 'POST' \
+              http://{{ include "prompt-usvc.fullname" . }}:{{ .Values.service.port }}/v1/prompt/create -sS --fail-with-body \
+              -H 'accept: application/json' \
+              -H 'Content-Type: application/json' \
+              -d '{"prompt_text": "test prompt", "user": "test"}' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/prompt-usvc/values.yaml b/helm-charts/prompt-usvc/values.yaml
new file mode 100644
index 000000000..a475e6f5e
--- /dev/null
+++ b/helm-charts/prompt-usvc/values.yaml
@@ -0,0 +1,99 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for prompt-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+mongodb:
+  enabled: false
+
+replicaCount: 1
+
+image:
+  repository: opea/promptregistry-mongo-server
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  port: 6018
+
+# Change the port to 6012 if you are using 1.0 docker images
+# https://github.com/opea-project/GenAIComps/pull/740
+port: 6018
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: port
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+# mongo DB service URL, e.g. mongo://<service-name>:<port>
+MONGO_HOST: ""
+MONGO_PORT: 27017
+DB_NAME: "OPEA"
+COLLECTION_NAME: "Prompt"
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/redis-vector-db/.helmignore b/helm-charts/redis-vector-db/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/redis-vector-db/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/redis-vector-db/Chart.yaml b/helm-charts/redis-vector-db/Chart.yaml
new file mode 100644
index 000000000..6e43a25d0
--- /dev/null
+++ b/helm-charts/redis-vector-db/Chart.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: redis-vector-db
+description: The Helm chart for Redis Vector DB
+type: application
+version: 0-latest
+appVersion: "7.2.0-v9"
diff --git a/helm-charts/redis-vector-db/README.md b/helm-charts/redis-vector-db/README.md
new file mode 100644
index 000000000..108367c92
--- /dev/null
+++ b/helm-charts/redis-vector-db/README.md
@@ -0,0 +1,29 @@
+# redis-vector-db
+
+Helm chart for deploying Redis Vector DB service.
+
+## Install the Chart
+
+To install the chart, run the following:
+
+```console
+cd ${GenAIInfro_repo}/helm-charts/common
+helm install redis-vector-db redis-vector-db
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all the redis pods are runinng.
+
+Then run the command `kubectl port-forward svc/redis-vector-db 6379:6379` to expose the redis vector db service for access.
+
+Open another terminal and run the command `redis-cli -h 127.0.0.1 -p 6379 ping` to access the redis vector db. The `redis-cli` command should return `PONG`.
+
+## Values
+
+| Key                          | Type   | Default               | Description            |
+| ---------------------------- | ------ | --------------------- | ---------------------- |
+| image.repository             | string | `"redis/redis-stack"` |                        |
+| image.tag                    | string | `"7.2.0-v9"`          |                        |
+| service.port (redis-service) | string | `"6379"`              | The redis-service port |
+| service.port (redis-insight) | string | `"8001"`              | The redis-insight port |
diff --git a/helm-charts/redis-vector-db/ci-values.yaml b/helm-charts/redis-vector-db/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/redis-vector-db/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/redis-vector-db/templates/_helpers.tpl b/helm-charts/redis-vector-db/templates/_helpers.tpl
new file mode 100644
index 000000000..f8f7a65da
--- /dev/null
+++ b/helm-charts/redis-vector-db/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "redis-vector-db.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "redis-vector-db.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "redis-vector-db.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "redis-vector-db.labels" -}}
+helm.sh/chart: {{ include "redis-vector-db.chart" . }}
+{{ include "redis-vector-db.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "redis-vector-db.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "redis-vector-db.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "redis-vector-db.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "redis-vector-db.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/redis-vector-db/templates/deployment.yaml b/helm-charts/redis-vector-db/templates/deployment.yaml
new file mode 100644
index 000000000..d4fc692b6
--- /dev/null
+++ b/helm-charts/redis-vector-db/templates/deployment.yaml
@@ -0,0 +1,87 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "redis-vector-db.fullname" . }}
+  labels:
+    {{- include "redis-vector-db.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "redis-vector-db.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "redis-vector-db.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          volumeMounts:
+            - mountPath: /data
+              name: data-volume
+            - mountPath: /redisinsight
+              name: redisinsight-volume
+            - mountPath: /tmp
+              name: tmp
+          ports:
+          {{- $redisServicePort := index .Values.service.ports 0 }}
+            {{- range .Values.service.ports }}
+            - name: {{ .name }}
+              containerPort: {{ .targetPort }}
+              protocol: TCP
+            {{- end }}
+          startupProbe:
+            tcpSocket:
+              port: {{ $redisServicePort.targetPort }} # Probe the Redis port
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 120
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: data-volume
+          emptyDir: {}
+        - name: redisinsight-volume
+          emptyDir: {}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "redis-vector-db.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/redis-vector-db/templates/service.yaml b/helm-charts/redis-vector-db/templates/service.yaml
new file mode 100644
index 000000000..d656f04ce
--- /dev/null
+++ b/helm-charts/redis-vector-db/templates/service.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "redis-vector-db.fullname" . }}
+  labels:
+    {{- include "redis-vector-db.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+      {{- range .Values.service.ports }}
+      - port: {{ .port }}
+        targetPort: {{ .targetPort }}
+        protocol: {{ .protocol }}
+        name: {{ .name }}
+      {{- end }}
+  selector:
+    {{- include "redis-vector-db.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/redis-vector-db/templates/tests/test-pod.yaml b/helm-charts/redis-vector-db/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..d694c1f63
--- /dev/null
+++ b/helm-charts/redis-vector-db/templates/tests/test-pod.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: {{ include "redis-vector-db.fullname" . }}-testpod
+  labels:
+    {{- include "redis-vector-db.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: redis:7.2-alpine
+      command: ['sh', '-c']
+      args:
+        - |
+          redis-cli -h {{ include "redis-vector-db.fullname" . }} \
+                    {{- with (first .Values.service.ports) }}
+                    -p {{ .port }} \
+                    {{- end }}
+                    ping
+  restartPolicy: Never
diff --git a/helm-charts/redis-vector-db/values.yaml b/helm-charts/redis-vector-db/values.yaml
new file mode 100644
index 000000000..d07339c3d
--- /dev/null
+++ b/helm-charts/redis-vector-db/values.yaml
@@ -0,0 +1,63 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+image:
+  repository: redis/redis-stack
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "7.2.0-v9"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  ports:
+  - name: redis-service
+    port: 6379
+    targetPort: 6379
+    protocol: TCP
+  - name: redis-insight
+    port: 8001
+    targetPort: 8001
+    protocol: TCP
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
diff --git a/helm-charts/reranking-usvc/.helmignore b/helm-charts/reranking-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/reranking-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/reranking-usvc/Chart.yaml b/helm-charts/reranking-usvc/Chart.yaml
new file mode 100644
index 000000000..a7f722882
--- /dev/null
+++ b/helm-charts/reranking-usvc/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: reranking-usvc
+description: The Helm chart for deploying reranking as microservice
+type: application
+version: 0-latest
+# The reranking microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: teirerank
+    version: 0-latest
+    repository: file://../teirerank
+    condition: teirerank.enabled
diff --git a/helm-charts/reranking-usvc/README.md b/helm-charts/reranking-usvc/README.md
new file mode 100644
index 000000000..bf77f7f3c
--- /dev/null
+++ b/helm-charts/reranking-usvc/README.md
@@ -0,0 +1,52 @@
+# reranking-usvc
+
+Helm chart for deploying reranking microservice.
+
+reranking-usvc depends on teirerank, set the TEI_RERANKING_ENDPOINT as teirerank endpoint.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the teirerank chart, please refer to the [teirerank](../teirerank) chart for more information.
+
+After you've deployted the teirerank chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e. `http://teirerank`.
+
+To install the reranking-usvc chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/reranking-usvc
+export TEI_RERANKING_ENDPOINT="http://teirerank"
+helm dependency update
+helm install reranking-usvc . --set TEI_RERANKING_ENDPOINT=${TEI_RERANKING_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/reranking-usvc
+helm dependency update
+helm install reranking-usvc . --set teirerank.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/reranking-usvc 8000:8000` to expose the reranking-usvc service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:8000/v1/reranking \
+    -X POST \
+    -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                    | Type   | Default                | Description |
+| ---------------------- | ------ | ---------------------- | ----------- |
+| image.repository       | string | `"opea/reranking-tgi"` |             |
+| TEI_RERANKING_ENDPOINT | string | `""`                   |             |
+| service.port           | string | `"8000"`               |             |
+| global.monitoring      | bool   | `false`                |             |
diff --git a/helm-charts/reranking-usvc/ci-values.yaml b/helm-charts/reranking-usvc/ci-values.yaml
new file mode 100644
index 000000000..1118483f4
--- /dev/null
+++ b/helm-charts/reranking-usvc/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for reranking-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+teirerank:
+  enabled: true
diff --git a/helm-charts/reranking-usvc/templates/_helpers.tpl b/helm-charts/reranking-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..9247fe13b
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "reranking-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "reranking-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "reranking-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "reranking-usvc.labels" -}}
+helm.sh/chart: {{ include "reranking-usvc.chart" . }}
+{{ include "reranking-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "reranking-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "reranking-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "llm-uservice.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "llm-uservice.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/reranking-usvc/templates/configmap.yaml b/helm-charts/reranking-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..69b1fcd91
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/configmap.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "reranking-usvc.fullname" . }}-config
+  labels:
+    {{- include "reranking-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.TEI_RERANKING_ENDPOINT }}
+  TEI_RERANKING_ENDPOINT: {{ .Values.TEI_RERANKING_ENDPOINT | quote }}
+  {{- else }}
+  TEI_RERANKING_ENDPOINT: "http://{{ .Release.Name }}-teirerank"
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.TEI_RERANKING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-teirerank,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/reranking-usvc/templates/deployment.yaml b/helm-charts/reranking-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..50abfc5a0
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "reranking-usvc.fullname" . }}
+  labels:
+    {{- include "reranking-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "reranking-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "reranking-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "reranking-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: reranking-usvc
+              containerPort: 8000
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "reranking-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/reranking-usvc/templates/service.yaml b/helm-charts/reranking-usvc/templates/service.yaml
new file mode 100644
index 000000000..2b1bb7afa
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "reranking-usvc.fullname" . }}
+  labels:
+    {{- include "reranking-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 8000
+      protocol: TCP
+      name: reranking-usvc
+  selector:
+    {{- include "reranking-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/reranking-usvc/templates/servicemonitor.yaml b/helm-charts/reranking-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..8d1306edf
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "reranking-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "reranking-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: reranking-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/reranking-usvc/templates/tests/test-pod.yaml b/helm-charts/reranking-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..4bd7ac745
--- /dev/null
+++ b/helm-charts/reranking-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "reranking-usvc.fullname" . }}-testpod"
+  labels:
+    {{- include "reranking-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "reranking-usvc.fullname" . }}:{{ .Values.service.port }}/v1/reranking -sS --fail-with-body \
+            -X POST \
+            -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/reranking-usvc/values.yaml b/helm-charts/reranking-usvc/values.yaml
new file mode 100644
index 000000000..924815ff7
--- /dev/null
+++ b/helm-charts/reranking-usvc/values.yaml
@@ -0,0 +1,98 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for reranking-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+teirerank:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+TEI_RERANKING_ENDPOINT: ""
+image:
+  repository: opea/reranking-tei
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for reranking service is 9000
+  port: 8000
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: reranking-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: reranking-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: reranking-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/retriever-usvc/.helmignore b/helm-charts/retriever-usvc/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/retriever-usvc/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/retriever-usvc/Chart.yaml b/helm-charts/retriever-usvc/Chart.yaml
new file mode 100644
index 000000000..0bb54c3c0
--- /dev/null
+++ b/helm-charts/retriever-usvc/Chart.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: retriever-usvc
+description: The Helm chart for deploying retriever as microservice
+type: application
+version: 0-latest
+# The retriever microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tei
+    version: 0-latest
+    repository: file://../tei
+    condition: tei.enabled
+  - name: redis-vector-db
+    version: 0-latest
+    repository: file://../redis-vector-db
+    condition: redis-vector-db.enabled
+  - name: milvus
+    version: 4.2.12
+    repository: https://zilliztech.github.io/milvus-helm/
+    condition: milvus.enabled
diff --git a/helm-charts/retriever-usvc/README.md b/helm-charts/retriever-usvc/README.md
new file mode 100644
index 000000000..1eba680c3
--- /dev/null
+++ b/helm-charts/retriever-usvc/README.md
@@ -0,0 +1,59 @@
+# retriever-usvc
+
+Helm chart for deploying Retriever microservice.
+
+retriever-usvc depends on redis and tei, you should set these endpoints before start.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tei and redis-vector-db chart, refer to the [tei](../tei/README.md) and [redis-vector-db](../redis-vector-db/README.md) for more information.
+
+After you've deployed the tei and redis-vector-db chart successfully, run `kubectl get svc` to get the service endpoint and URL respectively, i.e. `http://tei`, `redis://redis-vector-db:6379`.
+
+To install retriever-usvc chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/retriever-usvc
+export REDIS_URL="redis://redis-vector-db:6379"
+export TEI_EMBEDDING_ENDPOINT="http://tei"
+helm dependency update
+helm install retriever-usvc . --set REDIS_URL=${REDIS_URL} --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/retriever-usvc
+helm dependency update
+helm install retriever-usvc . --set tei.enabled=true --set redis-vector-db.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/retriever-usvc 7000:7000` to expose the retriever-usvc service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://localhost:7000/v1/retrieval  \
+    -X POST \
+    -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                    | Type   | Default                | Description |
+| ---------------------- | ------ | ---------------------- | ----------- |
+| image.repository       | string | `"opea/retriever-tgi"` |             |
+| service.port           | string | `"7000"`               |             |
+| REDIS_URL              | string | `""`                   |             |
+| TEI_EMBEDDING_ENDPOINT | string | `""`                   |             |
+| global.monitoring      | bool   | `false`                |             |
+
+## Milvus support
+
+Refer to the milvus-values.yaml for milvus configurations.
diff --git a/helm-charts/retriever-usvc/ci-values.yaml b/helm-charts/retriever-usvc/ci-values.yaml
new file mode 100644
index 000000000..cbc29c7ee
--- /dev/null
+++ b/helm-charts/retriever-usvc/ci-values.yaml
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for retriever-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: true
+redis-vector-db:
+  enabled: true
+milvus:
+  enabled: false
diff --git a/helm-charts/retriever-usvc/milvus-values.yaml b/helm-charts/retriever-usvc/milvus-values.yaml
new file mode 100644
index 000000000..c186b4be2
--- /dev/null
+++ b/helm-charts/retriever-usvc/milvus-values.yaml
@@ -0,0 +1,33 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for retriever-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+milvus:
+  enabled: true
+  cluster:
+    enabled: false
+  etcd:
+    replicaCount: 1
+  pulsar:
+    enabled: false
+  minio:
+    mode: standalone
+redis-vector-db:
+    enabled: false
+tei:
+  enabled: true
+
+image:
+  repository: opea/retriever-milvus
+port: 7000
+# text embedding inference service URL, e.g. http://<service-name>:<port>
+#TEI_EMBEDDING_ENDPOINT: "http://dataprep-tei:80"
+# milvus DB configurations
+#MILVUS_HOST: "dataprep-milvus"
+MILVUS_PORT: "19530"
+COLLECTION_NAME: "rag_milvus"
+MOSEC_EMBEDDING_ENDPOINT: ""
+MOSEC_EMBEDDING_MODEL: ""
diff --git a/helm-charts/retriever-usvc/templates/_helpers.tpl b/helm-charts/retriever-usvc/templates/_helpers.tpl
new file mode 100644
index 000000000..18c740303
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "retriever-usvc.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "retriever-usvc.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "retriever-usvc.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "retriever-usvc.labels" -}}
+helm.sh/chart: {{ include "retriever-usvc.chart" . }}
+{{ include "retriever-usvc.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "retriever-usvc.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "retriever-usvc.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "retriever-usvc.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "retriever-usvc.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/retriever-usvc/templates/configmap.yaml b/helm-charts/retriever-usvc/templates/configmap.yaml
new file mode 100644
index 000000000..6ada51029
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/configmap.yaml
@@ -0,0 +1,49 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "retriever-usvc.fullname" . }}-config
+  labels:
+    {{- include "retriever-usvc.labels" . | nindent 4 }}
+data:
+  {{- if .Values.MOSEC_EMBEDDING_ENDPOINT }}
+  MOSEC_EMBEDDING_ENDPOINT: {{ .Values.MOSEC_EMBEDDING_ENDPOINT | quote}}
+  MOSEC_EMBEDDING_MODEL: {{ .Values.MOSEC_EMBEDDING_MODEL | quote}}
+  {{- else if .Values.TEI_EMBEDDING_ENDPOINT }}
+  TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote }}
+  {{- else if not .Values.LOCAL_EMBEDDING_MODEL }}
+  TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei"
+  {{- end }}
+  {{- if .Values.LOCAL_EMBEDDING_MODEL }}
+  EMBED_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
+  LOCAL_EMBEDDING_MODEL: {{ .Values.LOCAL_EMBEDDING_MODEL | quote }}
+  {{- end }}
+  {{- if .Values.REDIS_URL }}
+  REDIS_URL: {{ .Values.REDIS_URL | quote}}
+  {{- else }}
+  REDIS_URL: "redis://{{ .Release.Name }}-redis-vector-db:6379"
+  {{- end }}
+  INDEX_NAME: {{ .Values.INDEX_NAME | quote }}
+  {{- if .Values.MILVUS_HOST }}
+  MILVUS_HOST: {{ .Values.MILVUS_HOST | quote }}
+  {{- else }}
+  MILVUS_HOST: "{{ .Release.Name }}-milvus"
+  {{- end }}
+  MILVUS: {{ .Values.MILVUS_HOST | quote }}
+  MILVUS_PORT: {{ .Values.MILVUS_PORT | quote }}
+  {{- if .Values.COLLECTION_NAME }}
+  COLLECTION_NAME: {{ .Values.COLLECTION_NAME | quote }}
+  {{- end }}
+  EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.REDIS_URL) (and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy)) }}
+  no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-redis-vector-db,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/retriever-usvc/templates/deployment.yaml b/helm-charts/retriever-usvc/templates/deployment.yaml
new file mode 100644
index 000000000..b491b5d4d
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "retriever-usvc.fullname" . }}
+  labels:
+    {{- include "retriever-usvc.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "retriever-usvc.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "retriever-usvc.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "retriever-usvc.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: retriever-usvc
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "retriever-usvc.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/retriever-usvc/templates/service.yaml b/helm-charts/retriever-usvc/templates/service.yaml
new file mode 100644
index 000000000..e127bf858
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "retriever-usvc.fullname" . }}
+  labels:
+    {{- include "retriever-usvc.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: retriever-usvc
+  selector:
+    {{- include "retriever-usvc.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/retriever-usvc/templates/servicemonitor.yaml b/helm-charts/retriever-usvc/templates/servicemonitor.yaml
new file mode 100644
index 000000000..2cfede645
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "retriever-usvc.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "retriever-usvc.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: retriever-usvc
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/retriever-usvc/templates/tests/test-pod.yaml b/helm-charts/retriever-usvc/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..3de01889d
--- /dev/null
+++ b/helm-charts/retriever-usvc/templates/tests/test-pod.yaml
@@ -0,0 +1,31 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "retriever-usvc.fullname" . }}-testpod"
+  labels:
+    {{- include "retriever-usvc.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)");
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "retriever-usvc.fullname" . }}:{{ .Values.service.port }}/v1/retrieval  -sS --fail-with-body \
+            -X POST \
+            -d "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${your_embedding}}" \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/retriever-usvc/values.yaml b/helm-charts/retriever-usvc/values.yaml
new file mode 100644
index 000000000..568f2c3f0
--- /dev/null
+++ b/helm-charts/retriever-usvc/values.yaml
@@ -0,0 +1,116 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for retriever-usvc.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: false
+milvus:
+  enabled: false
+redis-vector-db:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+TEI_EMBEDDING_ENDPOINT: ""
+LOCAL_EMBEDDING_MODEL: ""
+
+REDIS_URL: ""
+INDEX_NAME: "rag-redis"
+
+image:
+  repository: opea/retriever-redis
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+port: 7000
+service:
+  type: ClusterIP
+  # The default port for retriever service is 7000
+  port: 7000
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: retriever-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: retriever-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: retriever-usvc
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# milvus DB configurations
+MILVUS_HOST: ""
+MILVUS_PORT: ""
+COLLECTION_NAME: ""
+MOSEC_EMBEDDING_ENDPOINT: ""
+MOSEC_EMBEDDING_MODEL: ""
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/speecht5/.helmignore b/helm-charts/speecht5/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/speecht5/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/speecht5/Chart.yaml b/helm-charts/speecht5/Chart.yaml
new file mode 100644
index 000000000..e4e1e3ff8
--- /dev/null
+++ b/helm-charts/speecht5/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: speecht5
+description: The Helm chart for deploying speecht5 as microservice
+type: application
+version: 0-latest
+# The speecht5 microservice server version
+appVersion: "v1.0"
diff --git a/helm-charts/speecht5/README.md b/helm-charts/speecht5/README.md
new file mode 100644
index 000000000..0a8b71165
--- /dev/null
+++ b/helm-charts/speecht5/README.md
@@ -0,0 +1,31 @@
+# speecht5
+
+Helm chart for deploying speecht5 service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+export MODELDIR=/mnt/opea-models
+helm install speecht5 speecht5 --set global.modelUseHostPath=${MODELDIR}
+```
+
+## Verify
+
+Use port-forward to access it from localhost.
+
+```console
+kubectl port-forward service/speecht5 1234:7055 &
+curl http://localhost:1234/v1/tts \
+  -XPOST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key              | Type   | Default           | Description |
+| ---------------- | ------ | ----------------- | ----------- |
+| image.repository | string | `"opea/speecht5"` |             |
+| service.port     | string | `"7055"`          |             |
diff --git a/helm-charts/speecht5/ci-gaudi-values.yaml b/helm-charts/speecht5/ci-gaudi-values.yaml
new file mode 120000
index 000000000..7243d31b2
--- /dev/null
+++ b/helm-charts/speecht5/ci-gaudi-values.yaml
@@ -0,0 +1 @@
+gaudi-values.yaml
\ No newline at end of file
diff --git a/helm-charts/speecht5/ci-values.yaml b/helm-charts/speecht5/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/speecht5/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/speecht5/gaudi-values.yaml b/helm-charts/speecht5/gaudi-values.yaml
new file mode 100644
index 000000000..aefd9f373
--- /dev/null
+++ b/helm-charts/speecht5/gaudi-values.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for speecht5.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+  repository: opea/speecht5-gaudi
+  tag: "latest"
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
diff --git a/helm-charts/speecht5/templates/_helpers.tpl b/helm-charts/speecht5/templates/_helpers.tpl
new file mode 100644
index 000000000..7db4f3fd2
--- /dev/null
+++ b/helm-charts/speecht5/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "speecht5.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "speecht5.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "speecht5.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "speecht5.labels" -}}
+helm.sh/chart: {{ include "speecht5.chart" . }}
+{{ include "speecht5.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "speecht5.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "speecht5.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "speecht5.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "speecht5.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/speecht5/templates/configmap.yaml b/helm-charts/speecht5/templates/configmap.yaml
new file mode 100644
index 000000000..d488542fb
--- /dev/null
+++ b/helm-charts/speecht5/templates/configmap.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "speecht5.fullname" . }}-config
+  labels:
+    {{- include "speecht5.labels" . | nindent 4 }}
+data:
+  EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
+  # TTS_MODEL_PATH: {{ .Values.TTS_MODEL_PATH | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  HUGGINGFACE_HUB_CACHE: "/data"
diff --git a/helm-charts/speecht5/templates/deployment.yaml b/helm-charts/speecht5/templates/deployment.yaml
new file mode 100644
index 000000000..6b5976b84
--- /dev/null
+++ b/helm-charts/speecht5/templates/deployment.yaml
@@ -0,0 +1,107 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "speecht5.fullname" . }}
+  labels:
+    {{- include "speecht5.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "speecht5.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "speecht5.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "speecht5.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: speecht5
+              containerPort: 7055
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "speecht5.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/speecht5/templates/service.yaml b/helm-charts/speecht5/templates/service.yaml
new file mode 100644
index 000000000..393766c47
--- /dev/null
+++ b/helm-charts/speecht5/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "speecht5.fullname" . }}
+  labels:
+    {{- include "speecht5.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 7055
+      protocol: TCP
+      name: speecht5
+  selector:
+    {{- include "speecht5.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/speecht5/templates/tests/test-pod.yaml b/helm-charts/speecht5/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..fd86ea9ae
--- /dev/null
+++ b/helm-charts/speecht5/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "speecht5.fullname" . }}-testpod"
+  labels:
+    {{- include "speecht5.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "speecht5.fullname" . }}:{{ .Values.service.port }}/v1/tts \
+            -X POST \
+            -d '{"text": "Who are you?"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/speecht5/values.yaml b/helm-charts/speecht5/values.yaml
new file mode 100644
index 000000000..96a8e452a
--- /dev/null
+++ b/helm-charts/speecht5/values.yaml
@@ -0,0 +1,97 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for speecht5.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+TTS_MODEL_PATH: "microsoft/speecht5_tts"
+# VOCODE_MODEL: "microsoft/speecht5_hifigan"
+
+image:
+  repository: opea/speecht5
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for speecht5 service is 7055
+  port: 7055
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: /v1/health
+    port: speecht5
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: /v1/health
+    port: speecht5
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: /v1/health
+    port: speecht5
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
diff --git a/helm-charts/tei/.helmignore b/helm-charts/tei/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/tei/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/tei/Chart.yaml b/helm-charts/tei/Chart.yaml
new file mode 100644
index 000000000..42e81c036
--- /dev/null
+++ b/helm-charts/tei/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: tei
+description: The Helm chart for HuggingFace Text Embedding Inference Server
+type: application
+version: 0-latest
+# The HF TEI version
+appVersion: "cpu-1.5"
diff --git a/helm-charts/tei/README.md b/helm-charts/tei/README.md
new file mode 100644
index 000000000..b21c2eb25
--- /dev/null
+++ b/helm-charts/tei/README.md
@@ -0,0 +1,45 @@
+# tei
+
+Helm chart for deploying Hugging Face Text Generation Inference service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+cd ${GenAIInfro_repo}/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export MODELNAME="BAAI/bge-base-en-v1.5"
+helm install tei tei --set global.modelUseHostPath=${MODELDIR} --set EMBEDDING_MODEL_ID=${MODELNAME}
+```
+
+By default, the tei service will downloading the "BAAI/bge-base-en-v1.5" which is about 1.1GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="/data/BAAI/bge-base-en-v1.5"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/tei 2081:80` to expose the tei service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"}' -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                     | Type   | Default                                           | Description                                                                                                                                                                                                           |
+| ----------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| EMBEDDING_MODEL_ID      | string | `"BAAI/bge-base-en-v1.5"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
+| global.modelUseHostPath | string | `"/mnt/opea-models"`                              | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository        | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                       |
+| image.tag               | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                       |
+| autoscaling.enabled     | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
+| global.monitoring       | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
diff --git a/helm-charts/tei/ci-gaudi-values.yaml b/helm-charts/tei/ci-gaudi-values.yaml
new file mode 120000
index 000000000..7243d31b2
--- /dev/null
+++ b/helm-charts/tei/ci-gaudi-values.yaml
@@ -0,0 +1 @@
+gaudi-values.yaml
\ No newline at end of file
diff --git a/helm-charts/tei/ci-values.yaml b/helm-charts/tei/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/tei/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/tei/gaudi-values.yaml b/helm-charts/tei/gaudi-values.yaml
new file mode 100644
index 000000000..45627caf3
--- /dev/null
+++ b/helm-charts/tei/gaudi-values.yaml
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tei.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "gaudi"
+
+OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+MAX_WARMUP_SEQUENCE_LENGTH: "512"
+image:
+  repository: ghcr.io/huggingface/tei-gaudi
+  tag: 1.5.0
+
+securityContext:
+  readOnlyRootFilesystem: false
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
+
+livenessProbe:
+  timeoutSeconds: 1
+readinessProbe:
+  timeoutSeconds: 1
diff --git a/helm-charts/tei/templates/_helpers.tpl b/helm-charts/tei/templates/_helpers.tpl
new file mode 100644
index 000000000..fc4a57438
--- /dev/null
+++ b/helm-charts/tei/templates/_helpers.tpl
@@ -0,0 +1,69 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "tei.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "tei.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "tei.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Convert chart name to a string suitable as metric prefix
+*/}}
+{{- define "tei.metricPrefix" -}}
+{{- include "tei.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "tei.labels" -}}
+helm.sh/chart: {{ include "tei.chart" . }}
+{{ include "tei.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "tei.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "tei.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "tei.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "tei.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/tei/templates/configmap.yaml b/helm-charts/tei/templates/configmap.yaml
new file mode 100644
index 000000000..6b40613a1
--- /dev/null
+++ b/helm-charts/tei/templates/configmap.yaml
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "tei.fullname" . }}-config
+  labels:
+    {{- include "tei.labels" . | nindent 4 }}
+data:
+  MODEL_ID: {{ .Values.EMBEDDING_MODEL_ID | quote }}
+  PORT: {{ .Values.port | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  NUMBA_CACHE_DIR: "/tmp"
+  TRANSFORMERS_CACHE: "/tmp/transformers_cache"
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
+  HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
+  {{- end }}
+  {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
+  HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
+  {{- end }}
+  # More options for HPU
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
+  {{- end }}
+  {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
+  MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
+  {{- end }}
diff --git a/helm-charts/tei/templates/deployment.yaml b/helm-charts/tei/templates/deployment.yaml
new file mode 100644
index 000000000..442847df5
--- /dev/null
+++ b/helm-charts/tei/templates/deployment.yaml
@@ -0,0 +1,122 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "tei.fullname" . }}
+  labels:
+    {{- include "tei.labels" . | nindent 4 }}
+spec:
+  {{- if ne (int .Values.replicaCount) 1 }}
+  # remove if replica count should not be reset on pod update with HPA
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "tei.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "tei.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "tei.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          args:
+            - "--auto-truncate"
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.shmSize }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if not .Values.accelDevice }}
+      # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
+      terminationGracePeriodSeconds: 60
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "tei.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml b/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml
new file mode 100644
index 000000000..9b76e6ad4
--- /dev/null
+++ b/helm-charts/tei/templates/horizontal-pod-autoscaler.yaml
@@ -0,0 +1,60 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if and .Values.global.monitoring .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "tei.fullname" . }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "tei.fullname" . }}
+  minReplicas: 1
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+  - type: Object
+    object:
+      describedObject:
+        apiVersion: v1
+        # get metric for named object of given type (in same namespace)
+        kind: Service
+        name: {{ include "tei.fullname" . }}
+      target:
+{{- if .Values.accelDevice }}
+        # Metric is sum from all pods. "AverageValue" divides value returned from
+        # the custom metrics API by the number of Pods before comparing to the target:
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics
+        type: AverageValue
+        averageValue: 15
+      metric:
+        name: {{ include "tei.metricPrefix" . }}_queue_size_sum
+{{- else }}
+        # Metric is average for all the pods. To avoid replica fluctuation when pod
+        # startup + request processing takes longer than HPA evaluation period, this uses
+        # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type.
+        type: Value
+        value: 4 # seconds
+      metric:
+        name: {{ include "tei.metricPrefix" . }}_request_latency
+{{- end }}
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 180
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 15
+    scaleUp:
+      selectPolicy: Max
+      stabilizationWindowSeconds: 0
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 15
+      - type: Pods
+        value: 2
+        periodSeconds: 15
+{{- end }}
diff --git a/helm-charts/tei/templates/service.yaml b/helm-charts/tei/templates/service.yaml
new file mode 100644
index 000000000..a12a01dec
--- /dev/null
+++ b/helm-charts/tei/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "tei.fullname" . }}
+  labels:
+    {{- include "tei.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 80
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: tei
+  selector:
+    {{- include "tei.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/tei/templates/servicemonitor.yaml b/helm-charts/tei/templates/servicemonitor.yaml
new file mode 100644
index 000000000..96743442b
--- /dev/null
+++ b/helm-charts/tei/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "tei.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "tei.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: tei
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/tei/templates/tests/test-pod.yaml b/helm-charts/tei/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..700469beb
--- /dev/null
+++ b/helm-charts/tei/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "tei.fullname" . }}-testpod"
+  labels:
+    {{- include "tei.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "tei.fullname" . }}/embed -sS --fail-with-body \
+            -X POST \
+            -d '{"inputs":"What is Deep Learning?"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/tei/values.yaml b/helm-charts/tei/values.yaml
new file mode 100644
index 000000000..44460a359
--- /dev/null
+++ b/helm-charts/tei/values.yaml
@@ -0,0 +1,117 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tei.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+# Enabling HPA will:
+# - Ignore above replica count, as it will be controlled by HPA
+# - Add example HPA scaling rules with thresholds suitable for Xeon deployments
+# - Require custom metrics ConfigMap available in the main application chart
+autoscaling:
+  maxReplicas: 2
+  enabled: false
+
+port: 2081
+shmSize: 1Gi
+EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5"
+HF_HUB_DISABLE_PROGRESS_BARS: "1"
+HF_HUB_ENABLE_HF_TRANSFER: "0"
+
+image:
+  repository: ghcr.io/huggingface/text-embeddings-inference
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "cpu-1.5"
+
+# empty for CPU
+accelDevice: ""
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+  timeoutSeconds: 2
+readinessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 2
+startupProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/teirerank/.helmignore b/helm-charts/teirerank/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/teirerank/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/teirerank/Chart.yaml b/helm-charts/teirerank/Chart.yaml
new file mode 100644
index 000000000..cc9c015f9
--- /dev/null
+++ b/helm-charts/teirerank/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: teirerank
+description: The Helm chart for HuggingFace Text Embedding Inference Server
+type: application
+version: 0-latest
+# The HF TEI version
+appVersion: "cpu-1.5"
diff --git a/helm-charts/teirerank/README.md b/helm-charts/teirerank/README.md
new file mode 100644
index 000000000..79202497d
--- /dev/null
+++ b/helm-charts/teirerank/README.md
@@ -0,0 +1,48 @@
+# teirerank
+
+Helm chart for deploying Hugging Face Text Generation Inference service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+cd ${GenAIInfro_repo}/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export MODELNAME="BAAI/bge-reranker-base"
+helm install teirerank teirerank --set global.modelUseHostPath=${MODELDIR} --set RERANK_MODEL_ID=${MODELNAME}
+```
+
+By default, the teirerank service will downloading the "BAAI/bge-reranker-base" which is about 1.1GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="/data/BAAI/bge-reranker-base"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/teirerank 2082:80` to expose the tei service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2082/rerank \
+    -X POST \
+    -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                     | Type   | Default                                           | Description                                                                                                                                                                                                                 |
+| ----------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| RERANK_MODEL_ID         | string | `"BAAI/bge-reranker-base"`                        | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                                    |
+| global.modelUseHostPath | string | `"/mnt/opea-models"`                              | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository        | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                             |
+| image.tag               | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                             |
+| autoscaling.enabled     | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                       |
+| global.monitoring       | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                                 |
diff --git a/helm-charts/teirerank/ci-values.yaml b/helm-charts/teirerank/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/teirerank/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/teirerank/gaudi-values.yaml b/helm-charts/teirerank/gaudi-values.yaml
new file mode 100644
index 000000000..a5d6c1b14
--- /dev/null
+++ b/helm-charts/teirerank/gaudi-values.yaml
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for teirerank.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "gaudi"
+
+OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+MAX_WARMUP_SEQUENCE_LENGTH: "512"
+image:
+  repository: ghcr.io/huggingface/tei-gaudi
+  tag: 1.5.0
+
+securityContext:
+  readOnlyRootFilesystem: false
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
+
+livenessProbe:
+  timeoutSeconds: 1
+readinessProbe:
+  timeoutSeconds: 1
diff --git a/helm-charts/teirerank/templates/_helpers.tpl b/helm-charts/teirerank/templates/_helpers.tpl
new file mode 100644
index 000000000..0c0b9238f
--- /dev/null
+++ b/helm-charts/teirerank/templates/_helpers.tpl
@@ -0,0 +1,69 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "teirerank.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "teirerank.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "teirerank.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Convert chart name to a string suitable as metric prefix
+*/}}
+{{- define "teirerank.metricPrefix" -}}
+{{- include "teirerank.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "teirerank.labels" -}}
+helm.sh/chart: {{ include "teirerank.chart" . }}
+{{ include "teirerank.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "teirerank.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "teirerank.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "teirerank.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "teirerank.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/teirerank/templates/configmap.yaml b/helm-charts/teirerank/templates/configmap.yaml
new file mode 100644
index 000000000..f8c25bc0b
--- /dev/null
+++ b/helm-charts/teirerank/templates/configmap.yaml
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "teirerank.fullname" . }}-config
+  labels:
+    {{- include "teirerank.labels" . | nindent 4 }}
+data:
+  MODEL_ID: {{ .Values.RERANK_MODEL_ID | quote }}
+  PORT: {{ .Values.port | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  NUMBA_CACHE_DIR: "/tmp"
+  TRANSFORMERS_CACHE: "/tmp/transformers_cache"
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
+  HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
+  {{- end }}
+  {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
+  HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
+  {{- end }}
+  # More options for HPU
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
+  {{- end }}
+  {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }}
+  MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }}
+  {{- end }}
diff --git a/helm-charts/teirerank/templates/deployment.yaml b/helm-charts/teirerank/templates/deployment.yaml
new file mode 100644
index 000000000..2d0ec9992
--- /dev/null
+++ b/helm-charts/teirerank/templates/deployment.yaml
@@ -0,0 +1,122 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "teirerank.fullname" . }}
+  labels:
+    {{- include "teirerank.labels" . | nindent 4 }}
+spec:
+  {{- if ne (int .Values.replicaCount) 1 }}
+  # remove if replica count should not be reset on pod update with HPA
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "teirerank.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "teirerank.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "teirerank.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          args:
+            - "--auto-truncate"
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.shmSize }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if not .Values.accelDevice }}
+      # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
+      terminationGracePeriodSeconds: 60
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "teirerank.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml b/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml
new file mode 100644
index 000000000..b8a881934
--- /dev/null
+++ b/helm-charts/teirerank/templates/horizontal-pod-autoscaler.yaml
@@ -0,0 +1,60 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if and .Values.global.monitoring .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "teirerank.fullname" . }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "teirerank.fullname" . }}
+  minReplicas: 1
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+  - type: Object
+    object:
+      describedObject:
+        apiVersion: v1
+        # get metric for named object of given type (in same namespace)
+        kind: Service
+        name: {{ include "teirerank.fullname" . }}
+      target:
+{{- if .Values.accelDevice }}
+        # Metric is sum from all pods. "AverageValue" divides value returned from
+        # the custom metrics API by the number of Pods before comparing to the target:
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics
+        type: AverageValue
+        averageValue: 15
+      metric:
+        name: {{ include "teirerank.metricPrefix" . }}_queue_size_sum
+{{- else }}
+        # Metric is average for all the pods. To avoid replica fluctuation when pod
+        # startup + request processing takes longer than HPA evaluation period, this uses
+        # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type.
+        type: Value
+        value: 4 # seconds
+      metric:
+        name: {{ include "teirerank.metricPrefix" . }}_request_latency
+{{- end }}
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 180
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 15
+    scaleUp:
+      selectPolicy: Max
+      stabilizationWindowSeconds: 0
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 15
+      - type: Pods
+        value: 2
+        periodSeconds: 15
+{{- end }}
diff --git a/helm-charts/teirerank/templates/service.yaml b/helm-charts/teirerank/templates/service.yaml
new file mode 100644
index 000000000..a3616b6b0
--- /dev/null
+++ b/helm-charts/teirerank/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "teirerank.fullname" . }}
+  labels:
+    {{- include "teirerank.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 80
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: teirerank
+  selector:
+    {{- include "teirerank.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/teirerank/templates/servicemonitor.yaml b/helm-charts/teirerank/templates/servicemonitor.yaml
new file mode 100644
index 000000000..13110adbe
--- /dev/null
+++ b/helm-charts/teirerank/templates/servicemonitor.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "teirerank.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "teirerank.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: teirerank
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/teirerank/templates/tests/test-pod.yaml b/helm-charts/teirerank/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..225fe2f58
--- /dev/null
+++ b/helm-charts/teirerank/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "teirerank.fullname" . }}-testpod"
+  labels:
+    {{- include "teirerank.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "teirerank.fullname" . }}/rerank -sS --fail-with-body \
+            -X POST \
+            -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' -H 'Content-Type: application/json' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/teirerank/values.yaml b/helm-charts/teirerank/values.yaml
new file mode 100644
index 000000000..4d0dedf29
--- /dev/null
+++ b/helm-charts/teirerank/values.yaml
@@ -0,0 +1,117 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for teirerank.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+# Enabling HPA will:
+# - Ignore above replica count, as it will be controlled by HPA
+# - Add example HPA scaling rules with thresholds suitable for Xeon deployments
+# - Require custom metrics ConfigMap available in the main application chart
+autoscaling:
+  maxReplicas: 3
+  enabled: false
+
+port: 2082
+shmSize: 1Gi
+RERANK_MODEL_ID: "BAAI/bge-reranker-base"
+HF_HUB_DISABLE_PROGRESS_BARS: "1"
+HF_HUB_ENABLE_HF_TRANSFER: "0"
+
+image:
+  repository: ghcr.io/huggingface/text-embeddings-inference
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "cpu-1.5"
+
+# empty for CPU
+accelDevice: ""
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 8
+  periodSeconds: 8
+  timeoutSeconds: 4
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 8
+  periodSeconds: 8
+  timeoutSeconds: 4
+startupProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/tgi/.helmignore b/helm-charts/tgi/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/tgi/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/tgi/Chart.yaml b/helm-charts/tgi/Chart.yaml
new file mode 100644
index 000000000..9836a198b
--- /dev/null
+++ b/helm-charts/tgi/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: tgi
+description: The Helm chart for HuggingFace Text Generation Inference Server
+type: application
+version: 0-latest
+# The HF TGI version
+appVersion: "2.1.0"
diff --git a/helm-charts/tgi/README.md b/helm-charts/tgi/README.md
new file mode 100644
index 000000000..27acd96e8
--- /dev/null
+++ b/helm-charts/tgi/README.md
@@ -0,0 +1,52 @@
+# tgi
+
+Helm chart for deploying Hugging Face Text Generation Inference service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export MODELNAME="bigscience/bloom-560m"
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
+# To deploy on Gaudi enabled kubernetes cluster
+# helm install tgi tgi --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
+```
+
+By default, the tgi service will downloading the "bigscience/bloom-560m" which is about 1.1GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="/data/models--bigscience--bloom-560m"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/tgi 2080:80` to expose the tgi service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2080/generate \
+    -X POST \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+    -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                                           | Description                                                                                                                                                                                                           |
+| ------------------------------- | ------ | ------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID                    | string | `"bigscience/bloom-560m"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here`              | Hugging Face API token                                                                                                                                                                                                |
+| global.modelUseHostPath         | string | `"/mnt/opea-models"`                              | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository                | string | `"ghcr.io/huggingface/text-generation-inference"` |                                                                                                                                                                                                                       |
+| image.tag                       | string | `"1.4"`                                           |                                                                                                                                                                                                                       |
+| autoscaling.enabled             | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
+| global.monitoring               | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
diff --git a/helm-charts/tgi/ci-gaudi-values.yaml b/helm-charts/tgi/ci-gaudi-values.yaml
new file mode 120000
index 000000000..7243d31b2
--- /dev/null
+++ b/helm-charts/tgi/ci-gaudi-values.yaml
@@ -0,0 +1 @@
+gaudi-values.yaml
\ No newline at end of file
diff --git a/helm-charts/tgi/ci-values.yaml b/helm-charts/tgi/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/tgi/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/tgi/gaudi-values.yaml b/helm-charts/tgi/gaudi-values.yaml
new file mode 100644
index 000000000..9c46415cc
--- /dev/null
+++ b/helm-charts/tgi/gaudi-values.yaml
@@ -0,0 +1,39 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "gaudi"
+
+image:
+  repository: ghcr.io/huggingface/tgi-gaudi
+  tag: "2.0.6"
+
+MAX_INPUT_LENGTH: "1024"
+MAX_TOTAL_TOKENS: "2048"
+CUDA_GRAPHS: ""
+OMPI_MCA_btl_vader_single_copy_mechanism: "none"
+ENABLE_HPU_GRAPH: "true"
+LIMIT_HPU_GRAPH: "true"
+USE_FLASH_ATTENTION: "true"
+FLASH_ATTENTION_RECOMPUTE: "true"
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
+
+livenessProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+readinessProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+startupProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+  failureThreshold: 120
diff --git a/helm-charts/tgi/nv-values.yaml b/helm-charts/tgi/nv-values.yaml
new file mode 100644
index 000000000..d073ffb9c
--- /dev/null
+++ b/helm-charts/tgi/nv-values.yaml
@@ -0,0 +1,32 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+accelDevice: "nvidia"
+
+image:
+  repository: ghcr.io/huggingface/text-generation-inference
+  tag: "2.2.0"
+
+resources:
+  limits:
+    nvidia.com/gpu: 1
+
+CUDA_GRAPHS: ""
+
+livenessProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+readinessProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+startupProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+  failureThreshold: 120
diff --git a/helm-charts/tgi/templates/_helpers.tpl b/helm-charts/tgi/templates/_helpers.tpl
new file mode 100644
index 000000000..b672e8309
--- /dev/null
+++ b/helm-charts/tgi/templates/_helpers.tpl
@@ -0,0 +1,69 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "tgi.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "tgi.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "tgi.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Convert chart name to a string suitable as metric prefix
+*/}}
+{{- define "tgi.metricPrefix" -}}
+{{- include "tgi.fullname" . | replace "-" "_" | regexFind "[a-zA-Z_:][a-zA-Z0-9_:]*" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "tgi.labels" -}}
+helm.sh/chart: {{ include "tgi.chart" . }}
+{{ include "tgi.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "tgi.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "tgi.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "tgi.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "tgi.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/tgi/templates/configmap.yaml b/helm-charts/tgi/templates/configmap.yaml
new file mode 100644
index 000000000..82be971e4
--- /dev/null
+++ b/helm-charts/tgi/templates/configmap.yaml
@@ -0,0 +1,58 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "tgi.fullname" . }}-config
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+data:
+  MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }}
+  PORT: {{ .Values.port | quote }}
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- if contains "tgi-gaudi" .Values.image.repository }}
+  HABANA_LOGS: "/tmp/habana_logs"
+  {{- end }}
+  NUMBA_CACHE_DIR: "/tmp"
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.MAX_INPUT_LENGTH }}
+  MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
+  {{- end }}
+  {{- if .Values.MAX_TOTAL_TOKENS }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
+  {{- end }}
+  {{- if .Values.CUDA_GRAPHS }}
+  CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
+  {{- end }}
+  {{- if .Values.HF_HUB_DISABLE_PROGRESS_BARS }}
+  HF_HUB_DISABLE_PROGRESS_BARS: {{ .Values.HF_HUB_DISABLE_PROGRESS_BARS | quote }}
+  {{- end }}
+  {{- if .Values.HF_HUB_ENABLE_HF_TRANSFER }}
+  HF_HUB_ENABLE_HF_TRANSFER: {{ .Values.HF_HUB_ENABLE_HF_TRANSFER | quote }}
+  {{- end }}
+  # More options for HPU
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote }}
+  {{- end }}
+  {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
+  {{- end }}
+  {{- if .Values.ENABLE_HPU_GRAPH }}
+  ENABLE_HPU_GRAPH: {{ .Values.ENABLE_HPU_GRAPH | quote }}
+  {{- end }}
+  {{- if .Values.LIMIT_HPU_GRAPH }}
+  LIMIT_HPU_GRAPH: {{ .Values.LIMIT_HPU_GRAPH | quote }}
+  {{- end }}
+  {{- if .Values.USE_FLASH_ATTENTION }}
+  USE_FLASH_ATTENTION: {{ .Values.USE_FLASH_ATTENTION | quote }}
+  {{- end }}
+  {{- if .Values.FLASH_ATTENTION_RECOMPUTE }}
+  FLASH_ATTENTION_RECOMPUTE: {{ .Values.FLASH_ATTENTION_RECOMPUTE | quote }}
+  {{- end }}
diff --git a/helm-charts/tgi/templates/deployment.yaml b/helm-charts/tgi/templates/deployment.yaml
new file mode 100644
index 000000000..04436e613
--- /dev/null
+++ b/helm-charts/tgi/templates/deployment.yaml
@@ -0,0 +1,126 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  {{- if ne (int .Values.replicaCount) 1 }}
+  # remove if replica count should not be reset on pod update with HPA
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "tgi.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "tgi.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "tgi.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          {{- if .Values.extraCmdArgs }}
+          args:
+            {{- range .Values.extraCmdArgs }}
+            - {{ . | quote }}
+            {{- end }}
+          {{- end }}
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.shmSize }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if not .Values.accelDevice }}
+      # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
+      terminationGracePeriodSeconds: 120
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "tgi.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml b/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml
new file mode 100644
index 000000000..f0338e48d
--- /dev/null
+++ b/helm-charts/tgi/templates/horizontal-pod-autoscaler.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if and .Values.global.monitoring .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "tgi.fullname" . }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "tgi.fullname" . }}
+  minReplicas: 1
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+  - type: Object
+    object:
+      describedObject:
+        apiVersion: v1
+        # get metric for named object of given type (in same namespace)
+        kind: Service
+        name: {{ include "tgi.fullname" . }}
+      target:
+{{- if .Values.accelDevice }}
+        # Metric is sum from all pods. "AverageValue" divides value returned from
+        # the custom metrics API by the number of Pods before comparing to the target:
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#algorithm-details
+        #  https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/#autoscaling-on-multiple-metrics-and-custom-metrics
+        type: AverageValue
+        averageValue: 15
+      metric:
+        name: {{ include "tgi.metricPrefix" . }}_queue_size_sum
+{{- else }}
+        # Metric is average for all the pods. To avoid replica fluctuation when pod
+        # startup + request processing takes longer than HPA evaluation period, this uses
+        # "Value" (replicas = metric.value / target.value), instead of "AverageValue" type.
+        type: Value
+        value: 4 # seconds
+      metric:
+        name: {{ include "tgi.metricPrefix" . }}_request_latency
+{{- end }}
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 180
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 90
+    scaleUp:
+      selectPolicy: Max
+      stabilizationWindowSeconds: 0
+      policies:
+      # Slow linear rampup in case additional CPU pods go to same node
+      # (i.e. interfere with each other)
+      - type: Pods
+        value: 1
+        periodSeconds: 90
+      #- type: Percent
+      #  value: 25
+      #  periodSeconds: 90
+{{- end }}
diff --git a/helm-charts/tgi/templates/service.yaml b/helm-charts/tgi/templates/service.yaml
new file mode 100644
index 000000000..011cc37ec
--- /dev/null
+++ b/helm-charts/tgi/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 80
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: tgi
+  selector:
+    {{- include "tgi.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/tgi/templates/servicemonitor.yaml b/helm-charts/tgi/templates/servicemonitor.yaml
new file mode 100644
index 000000000..978174226
--- /dev/null
+++ b/helm-charts/tgi/templates/servicemonitor.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+# Dashboard for the exposed TGI metrics:
+# - https://grafana.com/grafana/dashboards/19831-text-generation-inference-dashboard/
+# Metric descriptions:
+# - https://github.com/huggingface/text-generation-inference/discussions/1127#discussioncomment-7240527
+
+{{- if .Values.global.monitoring }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "tgi.fullname" . }}
+  labels:
+    release: {{ .Values.global.prometheusRelease }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "tgi.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - port: tgi
+    interval: 5s
+{{- end }}
diff --git a/helm-charts/tgi/templates/tests/test-pod.yaml b/helm-charts/tgi/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..948f23860
--- /dev/null
+++ b/helm-charts/tgi/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "tgi.fullname" . }}-testpod"
+  labels:
+    {{- include "tgi.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "tgi.fullname" . }}/generate -sS --fail-with-body \
+            -X POST \
+            -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/tgi/values.yaml b/helm-charts/tgi/values.yaml
new file mode 100644
index 000000000..d58db193a
--- /dev/null
+++ b/helm-charts/tgi/values.yaml
@@ -0,0 +1,147 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tgi.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+# Enabling HPA will:
+# - Ignore above replica count, as it will be controlled by HPA
+# - Add example HPA scaling rules with thresholds suitable for Xeon deployments
+# - Require custom metrics ConfigMap available in the main application chart
+autoscaling:
+  maxReplicas: 4
+  enabled: false
+
+port: 2080
+shmSize: 1Gi
+
+# Set extraCmdArgs if you need to pass additional parameters to TGI for performance
+# Refer to https://huggingface.co/docs/text-generation-inference/en/reference/launcher for more options.
+# extraCmdArgs: ["--dtype","bfloat16"]
+
+image:
+  repository: ghcr.io/huggingface/text-generation-inference
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "2.4.0-intel-cpu"
+
+# empty for CPU
+accelDevice: ""
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# Use TCP probe instead of HTTP due to bug #483
+# https://github.com/opea-project/GenAIExamples/issues/483
+livenessProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 8
+  periodSeconds: 8
+  timeoutSeconds: 4
+  failureThreshold: 24
+readinessProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 16
+  periodSeconds: 8
+  timeoutSeconds: 4
+startupProbe:
+  tcpSocket:
+    port: http
+  initialDelaySeconds: 10
+  periodSeconds: 5
+  failureThreshold: 180
+  timeoutSeconds: 2
+# livenessProbe:
+#   httpGet:
+#     path: /health
+#     port: http
+#   initialDelaySeconds: 5
+#   periodSeconds: 5
+#   failureThreshold: 24
+# readinessProbe:
+#   httpGet:
+#     path: /health
+#     port: http
+#   initialDelaySeconds: 5
+#   periodSeconds: 5
+# startupProbe:
+#   httpGet:
+#     path: /health
+#     port: http
+#   initialDelaySeconds: 5
+#   periodSeconds: 5
+#   failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+
+MAX_INPUT_LENGTH: ""
+MAX_TOTAL_TOKENS: ""
+CUDA_GRAPHS: "0"
+HF_HUB_DISABLE_PROGRESS_BARS: "1"
+HF_HUB_ENABLE_HF_TRANSFER: "0"
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
+
+  # Install Prometheus serviceMonitor for service
+  monitoring: false
+
+  # Prometheus Helm install release name for serviceMonitor
+  prometheusRelease: prometheus-stack
diff --git a/helm-charts/tts/.helmignore b/helm-charts/tts/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/tts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/tts/Chart.yaml b/helm-charts/tts/Chart.yaml
new file mode 100644
index 000000000..3d0e1462d
--- /dev/null
+++ b/helm-charts/tts/Chart.yaml
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: tts
+description: The Helm chart for deploying tts as microservice
+type: application
+version: 0-latest
+# The tts microservice server version
+appVersion: "v1.0"
+
+dependencies:
+  - name: speecht5
+    version: 0-latest
+    repository: file://../speecht5
+    condition: speecht5.enabled
diff --git a/helm-charts/tts/README.md b/helm-charts/tts/README.md
new file mode 100644
index 000000000..24f3c6cfd
--- /dev/null
+++ b/helm-charts/tts/README.md
@@ -0,0 +1,51 @@
+# tts
+
+Helm chart for deploying tts microservice.
+
+tts depends on speecht5, you should set TTS_ENDPOINT endpoints before start.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the speecht5 chart, please refer to the [speecht5](../speecht5) chart for more information.
+
+After you've deployted the speecht5 chart successfully, please run `kubectl get svc` to get the speecht5 service endpoint, i.e. `http://speecht5:7055`.
+
+To install the tts chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/tts
+export TTS_ENDPOINT="http://speecht5:7055"
+helm dependency update
+helm install tts . --set TTS_ENDPOINT=${TTS_ENDPOINT}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/tts
+helm dependency update
+helm install tts . --set speecht5.enabled=true
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/tts 9088:9088` to expose the tts service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:9088/v1/audio/speech \
+  -XPOST \
+  -d '{"text": "Who are you?"}' \
+  -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key              | Type   | Default      | Description |
+| ---------------- | ------ | ------------ | ----------- |
+| image.repository | string | `"opea/tts"` |             |
+| service.port     | string | `"9088"`     |             |
+| TTS_ENDPOINT     | string | `""`         |             |
diff --git a/helm-charts/tts/ci-values.yaml b/helm-charts/tts/ci-values.yaml
new file mode 100644
index 000000000..8eda0bf5b
--- /dev/null
+++ b/helm-charts/tts/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tts.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+speecht5:
+  enabled: true
diff --git a/helm-charts/tts/templates/_helpers.tpl b/helm-charts/tts/templates/_helpers.tpl
new file mode 100644
index 000000000..22b5e576a
--- /dev/null
+++ b/helm-charts/tts/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "tts.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "tts.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "tts.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "tts.labels" -}}
+helm.sh/chart: {{ include "tts.chart" . }}
+{{ include "tts.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "tts.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "tts.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "tts.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "tts.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/tts/templates/configmap.yaml b/helm-charts/tts/templates/configmap.yaml
new file mode 100644
index 000000000..96fd677e7
--- /dev/null
+++ b/helm-charts/tts/templates/configmap.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "tts.fullname" . }}-config
+  labels:
+    {{- include "tts.labels" . | nindent 4 }}
+data:
+  {{- if .Values.TTS_ENDPOINT }}
+  TTS_ENDPOINT: {{ .Values.TTS_ENDPOINT | quote}}
+  {{- else }}
+  TTS_ENDPOINT: "http://{{ .Release.Name }}-speecht5:7055"
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.TTS_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-speecht5,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/tts/templates/deployment.yaml b/helm-charts/tts/templates/deployment.yaml
new file mode 100644
index 000000000..26da197a1
--- /dev/null
+++ b/helm-charts/tts/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "tts.fullname" . }}
+  labels:
+    {{- include "tts.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "tts.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "tts.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "tts.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: tts
+              containerPort: 9088
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "tts.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/tts/templates/service.yaml b/helm-charts/tts/templates/service.yaml
new file mode 100644
index 000000000..f2d261e6a
--- /dev/null
+++ b/helm-charts/tts/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "tts.fullname" . }}
+  labels:
+    {{- include "tts.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 9088
+      protocol: TCP
+      name: tts
+  selector:
+    {{- include "tts.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/tts/templates/tests/test-pod.yaml b/helm-charts/tts/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..26fcbcca3
--- /dev/null
+++ b/helm-charts/tts/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "tts.fullname" . }}-testpod"
+  labels:
+    {{- include "tts.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "tts.fullname" . }}:{{ .Values.service.port }}/v1/audio/speech \
+            -XPOST \
+            -d '{"text": "Who are you?"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/tts/values.yaml b/helm-charts/tts/values.yaml
new file mode 100644
index 000000000..39214480c
--- /dev/null
+++ b/helm-charts/tts/values.yaml
@@ -0,0 +1,93 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for tts.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+speecht5:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+TTS_ENDPOINT: ""
+
+image:
+  repository: opea/tts
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for tts service is 9088
+  port: 9088
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: tts
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: tts
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: tts
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/ui/.helmignore b/helm-charts/ui/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/ui/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/ui/Chart.yaml b/helm-charts/ui/Chart.yaml
new file mode 100644
index 000000000..72b9b43fe
--- /dev/null
+++ b/helm-charts/ui/Chart.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: ui
+description: Common Helm chart for the UI for various opea workload
+type: application
+version: 0-latest
+appVersion: "v1.0"
diff --git a/helm-charts/ui/ci-values.yaml b/helm-charts/ui/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/ui/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml b/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml
new file mode 120000
index 000000000..83ad22951
--- /dev/null
+++ b/helm-charts/ui/ci-variant_chatqna_svelte-values.yaml
@@ -0,0 +1 @@
+variant_chatqna_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_codegen_react-values.yaml b/helm-charts/ui/ci-variant_codegen_react-values.yaml
new file mode 120000
index 000000000..dca8edda6
--- /dev/null
+++ b/helm-charts/ui/ci-variant_codegen_react-values.yaml
@@ -0,0 +1 @@
+variant_codegen_react-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_codegen_svelte-values.yaml b/helm-charts/ui/ci-variant_codegen_svelte-values.yaml
new file mode 120000
index 000000000..92b8a5e59
--- /dev/null
+++ b/helm-charts/ui/ci-variant_codegen_svelte-values.yaml
@@ -0,0 +1 @@
+variant_codegen_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml b/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml
new file mode 120000
index 000000000..8034cb024
--- /dev/null
+++ b/helm-charts/ui/ci-variant_codetrans_svelte-values.yaml
@@ -0,0 +1 @@
+variant_codetrans_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_docsum_react-values.yaml b/helm-charts/ui/ci-variant_docsum_react-values.yaml
new file mode 120000
index 000000000..a06c0a483
--- /dev/null
+++ b/helm-charts/ui/ci-variant_docsum_react-values.yaml
@@ -0,0 +1 @@
+variant_docsum_react-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_docsum_svelte-values.yaml b/helm-charts/ui/ci-variant_docsum_svelte-values.yaml
new file mode 120000
index 000000000..c0a3e63ff
--- /dev/null
+++ b/helm-charts/ui/ci-variant_docsum_svelte-values.yaml
@@ -0,0 +1 @@
+variant_docsum_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_faqgen_react-values.yaml b/helm-charts/ui/ci-variant_faqgen_react-values.yaml
new file mode 120000
index 000000000..29846e96d
--- /dev/null
+++ b/helm-charts/ui/ci-variant_faqgen_react-values.yaml
@@ -0,0 +1 @@
+variant_faqgen_react-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml b/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml
new file mode 120000
index 000000000..f1976d3a9
--- /dev/null
+++ b/helm-charts/ui/ci-variant_faqgen_svelte-values.yaml
@@ -0,0 +1 @@
+variant_faqgen_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml b/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml
new file mode 120000
index 000000000..b14d59afd
--- /dev/null
+++ b/helm-charts/ui/ci-variant_visualqna_svelte-values.yaml
@@ -0,0 +1 @@
+variant_visualqna_svelte-values.yaml
\ No newline at end of file
diff --git a/helm-charts/ui/templates/_helpers.tpl b/helm-charts/ui/templates/_helpers.tpl
new file mode 100644
index 000000000..07e404849
--- /dev/null
+++ b/helm-charts/ui/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "ui.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "ui.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "ui.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "ui.labels" -}}
+helm.sh/chart: {{ include "ui.chart" . }}
+{{ include "ui.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "ui.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "ui.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "ui.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "ui.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/ui/templates/configmap.yaml b/helm-charts/ui/templates/configmap.yaml
new file mode 100644
index 000000000..af6cf6fde
--- /dev/null
+++ b/helm-charts/ui/templates/configmap.yaml
@@ -0,0 +1,42 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "ui.fullname" . }}-config
+  labels:
+    {{- include "ui.labels" . | nindent 4 }}
+data:
+  {{- if contains "codegen-react-ui" .Values.image.repository }}
+  VITE_CODE_GEN_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "codegen-ui" .Values.image.repository }}
+  BASIC_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "codetrans-ui" .Values.image.repository }}
+  BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if (contains "docsum-ui" .Values.image.repository) }}
+  DOC_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  BACKEND_SERVICE_ENDPOINT: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if (contains "docsum-react-ui" .Values.image.repository) }}
+  VITE_DOC_SUM_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "chatqna-ui" .Values.image.repository }}
+  CHAT_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  UPLOAD_FILE_BASE_URL: {{ .Values.DATAPREP_SERVICE_ENDPOINT | quote }}
+  GET_FILE: {{ .Values.DATAPREP_GET_FILE_ENDPOINT | quote }}
+  DELETE_FILE: {{ .Values.DATAPREP_DELETE_FILE_ENDPOINT | quote }}
+  {{- else if contains "chatqna-conversation-ui" .Values.image.repository}}
+  APP_BACKEND_SERVICE_ENDPOINT: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  APP_DATA_PREP_SERVICE_URL: {{ .Values.DATAPREP_SERVICE_ENDPOINT | quote }}
+  APP_DATA_PREP_GET_FILE_URL: {{ .Values.DATAPREP_GET_FILE_ENDPOINT | quote }}
+  APP_DATA_PREP_DELETE_FILE_URL: {{ .Values.DATAPREP_DELETE_FILE_ENDPOINT | quote }}
+  {{- else if contains "visualqna-ui" .Values.image.repository }}
+  BACKEND_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "audioqna-ui" .Values.image.repository }}
+  CHAT_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "faqgen-react-ui" .Values.image.repository }}
+  VITE_FAQ_GEN_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else if contains "faqgen-ui" .Values.image.repository }}
+  FAQ_BASE_URL: {{ .Values.BACKEND_SERVICE_ENDPOINT | quote }}
+  {{- else }}
+    {{- fail "Unsupported ui image: " .Values.image.repository }}
+  {{- end }}
diff --git a/helm-charts/ui/templates/deployment.yaml b/helm-charts/ui/templates/deployment.yaml
new file mode 100644
index 000000000..5bf8231bb
--- /dev/null
+++ b/helm-charts/ui/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "ui.fullname" . }}
+  labels:
+    {{- include "ui.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "ui.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "ui.labels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "ui.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: ui
+              containerPort: {{ .Values.containerPort }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "chatqna-ui.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/ui/templates/service.yaml b/helm-charts/ui/templates/service.yaml
new file mode 100644
index 000000000..0757f090a
--- /dev/null
+++ b/helm-charts/ui/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "ui.fullname" . }}
+  labels:
+    {{- include "ui.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: ui
+      protocol: TCP
+      name: ui
+  selector:
+    {{- include "ui.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/ui/templates/tests/test-pod.yaml b/helm-charts/ui/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..5c320d599
--- /dev/null
+++ b/helm-charts/ui/templates/tests/test-pod.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "ui.fullname" . }}-testpod"
+  labels:
+    {{- include "ui.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "ui.fullname" . }}:{{ .Values.service.port }} -sS --fail-with-body -X GET && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/ui/values.yaml b/helm-charts/ui/values.yaml
new file mode 100644
index 000000000..4e0beddf4
--- /dev/null
+++ b/helm-charts/ui/values.yaml
@@ -0,0 +1,99 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for conversational-ui.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+image:
+  repository: opea/chatqna-conversation-ui
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+  # Currently comment the securityContext settings
+  # as the inappropriate settings within Conversational UI (need root privilege)
+  # issue: https://github.com/opea-project/GenAIExamples/issues/517
+  # readOnlyRootFilesystem: true
+  # allowPrivilegeEscalation: false
+  # runAsNonRoot: true
+  # runAsUser: 1000
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # seccompProfile:
+  #   type: RuntimeDefault
+
+# internal container port
+# conventional ui container port: 5173, conversational ui container port: 80
+containerPort: 80
+
+service:
+  type: ClusterIP
+  port: 5174
+
+livenessProbe:
+  httpGet:
+    path: /
+    port: ui
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: /
+    port: ui
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: /
+    port: ui
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+# chatQnA backend service URL, default to Mega backend service
+BACKEND_SERVICE_ENDPOINT: "/v1/chatqna"
+
+# data preparation service URL, default to Mega data preparation service
+DATAPREP_SERVICE_ENDPOINT: "/v1/dataprep"
+
+# data preparation get file service URL, default to Mega data preparation service
+DATAPREP_GET_FILE_ENDPOINT: "/v1/dataprep/get_file"
+
+# data preparation delete file service URL, default to Mega data preparation service
+DATAPREP_DELETE_FILE_ENDPOINT: "/v1/dataprep/delete_file"
+
+global: {}
diff --git a/helm-charts/ui/variant_chatqna_svelte-values.yaml b/helm-charts/ui/variant_chatqna_svelte-values.yaml
new file mode 100644
index 000000000..ab688a5ab
--- /dev/null
+++ b/helm-charts/ui/variant_chatqna_svelte-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/chatqna-ui
+  tag: "latest"
+
+containerPort: 5173
diff --git a/helm-charts/ui/variant_codegen_react-values.yaml b/helm-charts/ui/variant_codegen_react-values.yaml
new file mode 100644
index 000000000..37a2ee2b0
--- /dev/null
+++ b/helm-charts/ui/variant_codegen_react-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/codegen-react-ui
+  tag: "latest"
+
+BACKEND_SERVICE_ENDPOINT: "/v1/codegen"
diff --git a/helm-charts/ui/variant_codegen_svelte-values.yaml b/helm-charts/ui/variant_codegen_svelte-values.yaml
new file mode 100644
index 000000000..8ec0519eb
--- /dev/null
+++ b/helm-charts/ui/variant_codegen_svelte-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/codegen-ui
+  tag: "latest"
+
+containerPort: 5173
+
+BACKEND_SERVICE_ENDPOINT: "/v1/codegen"
diff --git a/helm-charts/ui/variant_codetrans_svelte-values.yaml b/helm-charts/ui/variant_codetrans_svelte-values.yaml
new file mode 100644
index 000000000..fe943ae08
--- /dev/null
+++ b/helm-charts/ui/variant_codetrans_svelte-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/codetrans-ui
+  tag: "latest"
+
+containerPort: 5173
+
+BACKEND_SERVICE_ENDPOINT: "/v1/codetrans"
diff --git a/helm-charts/ui/variant_docsum_react-values.yaml b/helm-charts/ui/variant_docsum_react-values.yaml
new file mode 100644
index 000000000..a28b9acb7
--- /dev/null
+++ b/helm-charts/ui/variant_docsum_react-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/docsum-react-ui
+  tag: "latest"
+
+BACKEND_SERVICE_ENDPOINT: "/v1/docsum"
diff --git a/helm-charts/ui/variant_docsum_svelte-values.yaml b/helm-charts/ui/variant_docsum_svelte-values.yaml
new file mode 100644
index 000000000..6c9e215f4
--- /dev/null
+++ b/helm-charts/ui/variant_docsum_svelte-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/docsum-ui
+  tag: "latest"
+
+containerPort: 5173
+
+BACKEND_SERVICE_ENDPOINT: "/v1/docsum"
diff --git a/helm-charts/ui/variant_faqgen_react-values.yaml b/helm-charts/ui/variant_faqgen_react-values.yaml
new file mode 100644
index 000000000..4af8aa97e
--- /dev/null
+++ b/helm-charts/ui/variant_faqgen_react-values.yaml
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/faqgen-react-ui
+  tag: "latest"
+
+BACKEND_SERVICE_ENDPOINT: "/v1/faqgen"
diff --git a/helm-charts/ui/variant_faqgen_svelte-values.yaml b/helm-charts/ui/variant_faqgen_svelte-values.yaml
new file mode 100644
index 000000000..846cee1ad
--- /dev/null
+++ b/helm-charts/ui/variant_faqgen_svelte-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/faqgen-ui
+  tag: "latest"
+
+containerPort: 5173
+
+BACKEND_SERVICE_ENDPOINT: "/v1/faqgen"
diff --git a/helm-charts/ui/variant_visualqna_svelte-values.yaml b/helm-charts/ui/variant_visualqna_svelte-values.yaml
new file mode 100644
index 000000000..2b1aab1d2
--- /dev/null
+++ b/helm-charts/ui/variant_visualqna_svelte-values.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+image:
+  repository: opea/visualqna-ui
+  tag: "latest"
+
+containerPort: 5173
+
+BACKEND_SERVICE_ENDPOINT: "/v1/visualqna"
diff --git a/helm-charts/vllm/.helmignore b/helm-charts/vllm/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/vllm/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/vllm/Chart.yaml b/helm-charts/vllm/Chart.yaml
new file mode 100644
index 000000000..47dacc70b
--- /dev/null
+++ b/helm-charts/vllm/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: vllm
+description: The Helm chart for vLLM Inference Server
+type: application
+version: 0-latest
+# The vLLM version
+appVersion: "0.5"
diff --git a/helm-charts/vllm/README.md b/helm-charts/vllm/README.md
new file mode 100644
index 000000000..0235a7443
--- /dev/null
+++ b/helm-charts/vllm/README.md
@@ -0,0 +1,53 @@
+# vllm
+
+Helm chart for deploying vLLM Inference service.
+
+Refer to [Deploy with Helm Charts](../../README.md) for global guides.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+Note that you cannot use vllm as the service release name due to [environment variables conflict](https://docs.vllm.ai/en/stable/serving/env_vars.html#environment-variables).
+
+```console
+cd GenAIInfra/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export MODELNAME="Intel/neural-chat-7b-v3-3"
+export HFTOKEN="insert-your-huggingface-token-here"
+helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN}
+# To deploy on Gaudi enabled kubernetes cluster
+# helm install myvllm vllm --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml
+```
+
+By default, the vllm service will downloading the "Intel/neural-chat-7b-v3-3".
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="facebook/opt-125m"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng.
+
+Then run the command `kubectl port-forward svc/myvllm 2080:80` to expose the vllm service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+curl http://localhost:2080/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
+```
+
+## Values
+
+| Key                             | Type   | Default                              | Description                                                                                                                                                                                                            |
+| ------------------------------- | ------ | ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| LLM_MODEL_ID                    | string | `"Intel/neural-chat-7b-v3-3"`        | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                               |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token                                                                                                                                                                                                 |
+| global.modelUseHostPath         | string | `""`                                 | Cached models directory, vllm will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| image.repository                | string | `"opea/vllm"`                        |                                                                                                                                                                                                                        |
+| image.tag                       | string | `"latest"`                           |                                                                                                                                                                                                                        |
diff --git a/helm-charts/vllm/ci-gaudi-values.yaml b/helm-charts/vllm/ci-gaudi-values.yaml
new file mode 120000
index 000000000..7243d31b2
--- /dev/null
+++ b/helm-charts/vllm/ci-gaudi-values.yaml
@@ -0,0 +1 @@
+gaudi-values.yaml
\ No newline at end of file
diff --git a/helm-charts/vllm/ci-values.yaml b/helm-charts/vllm/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/vllm/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/vllm/gaudi-values.yaml b/helm-charts/vllm/gaudi-values.yaml
new file mode 100644
index 000000000..65e622044
--- /dev/null
+++ b/helm-charts/vllm/gaudi-values.yaml
@@ -0,0 +1,19 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for vllm.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+  repository: opea/vllm-gaudi
+  tag: "latest"
+
+# VLLM_CPU_KVCACHE_SPACE: "40"
+OMPI_MCA_btl_vader_single_copy_mechanism: none
+extraCmdArgs: ["--enforce-eager","--tensor-parallel-size","1","--block-size","128","--max-num-seqs","256","--max-seq_len-to-capture","2048"]
+# Workaround for current HPU image with start command /bin/bash
+# extraCmdArgs: ["/bin/bash","-c","python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model Intel/neural-chat-7b-v3-3 --tensor-parallel-size 1 --host 0.0.0.0 --port 2080 --download-dir /data --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"]
+resources:
+  limits:
+    habana.ai/gaudi: 1
diff --git a/helm-charts/vllm/templates/_helpers.tpl b/helm-charts/vllm/templates/_helpers.tpl
new file mode 100644
index 000000000..41b1f8d66
--- /dev/null
+++ b/helm-charts/vllm/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "vllm.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "vllm.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "vllm.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "vllm.labels" -}}
+helm.sh/chart: {{ include "vllm.chart" . }}
+{{ include "vllm.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "vllm.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "vllm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "vllm.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "vllm.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/vllm/templates/configmap.yaml b/helm-charts/vllm/templates/configmap.yaml
new file mode 100644
index 000000000..14a8ba240
--- /dev/null
+++ b/helm-charts/vllm/templates/configmap.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "vllm.fullname" . }}-config
+  labels:
+    {{- include "vllm.labels" . | nindent 4 }}
+data:
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- if contains "opea/vllm-gaudi" .Values.image.repository }}
+  HABANA_LOGS: "/tmp/habana_logs"
+  {{- end }}
+  NUMBA_CACHE_DIR: "/tmp"
+  HF_HOME: "/tmp/.cache/huggingface"
+  # https://github.com/outlines-dev/outlines/blob/main/outlines/caching.py#L14-L29
+  OUTLINES_CACHE_DIR: "/tmp/.cache/outlines"
+  {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
+  VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
+  {{- end }}
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
+  {{- end }}
diff --git a/helm-charts/vllm/templates/deployment.yaml b/helm-charts/vllm/templates/deployment.yaml
new file mode 100644
index 000000000..66b5e3722
--- /dev/null
+++ b/helm-charts/vllm/templates/deployment.yaml
@@ -0,0 +1,127 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "vllm.fullname" . }}
+  labels:
+    {{- include "vllm.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "vllm.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "vllm.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "vllm.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          args:
+          {{- if .Values.extraCmdArgs }}
+            {{- range .Values.extraCmdArgs }}
+            - {{ . | quote }}
+            {{- end }}
+          {{- end }}
+            - "--model"
+            - {{ .Values.LLM_MODEL_ID | quote }}
+            - "--host"
+            - "0.0.0.0"
+            - "--port"
+            - {{ .Values.port | quote }}
+            - "--download-dir"
+            - "/data"
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /dev/shm
+              name: shm
+            - mountPath: /tmp
+              name: tmp
+          ports:
+            - name: http
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: {{ .Values.shmSize }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "vllm.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/vllm/templates/service.yaml b/helm-charts/vllm/templates/service.yaml
new file mode 100644
index 000000000..42e4fab70
--- /dev/null
+++ b/helm-charts/vllm/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "vllm.fullname" . }}
+  labels:
+    {{- include "vllm.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 80
+      targetPort: {{ .Values.port }}
+      protocol: TCP
+      name: vllm
+  selector:
+    {{- include "vllm.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/vllm/templates/tests/test-pod.yaml b/helm-charts/vllm/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..6a712e3ed
--- /dev/null
+++ b/helm-charts/vllm/templates/tests/test-pod.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "vllm.fullname" . }}-testpod"
+  labels:
+    {{- include "vllm.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl http://{{ include "vllm.fullname" . }}/v1/completions -sS --fail-with-body \
+            -H "Content-Type: application/json" \
+            -d '{"model": {{ .Values.LLM_MODEL_ID | quote }},"prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/vllm/values.yaml b/helm-charts/vllm/values.yaml
new file mode 100644
index 000000000..fb599ccd3
--- /dev/null
+++ b/helm-charts/vllm/values.yaml
@@ -0,0 +1,100 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for vllm.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+port: 2080
+shmSize: 1Gi
+image:
+  repository: opea/vllm
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+#  readOnlyRootFilesystem: true
+#  allowPrivilegeEscalation: false
+#  runAsNonRoot: true
+#  runAsUser: 1000
+#  capabilities:
+#    drop:
+#    - ALL
+#  seccompProfile:
+#    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+extraCmdArgs: ["--enforce-eager", "--dtype", "auto"]
+
+livenessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: /health
+    port: http
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+VLLM_CPU_KVCACHE_SPACE: ""
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+  HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""
diff --git a/helm-charts/web-retriever/.helmignore b/helm-charts/web-retriever/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/web-retriever/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/web-retriever/Chart.yaml b/helm-charts/web-retriever/Chart.yaml
new file mode 100644
index 000000000..5153f480b
--- /dev/null
+++ b/helm-charts/web-retriever/Chart.yaml
@@ -0,0 +1,15 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: web-retriever
+description: The Helm chart for deploying web retriever as microservice
+type: application
+version: 0-latest
+# The web retriever microservice server version
+appVersion: "v1.0"
+dependencies:
+  - name: tei
+    version: 0-latest
+    repository: file://../tei
+    condition: tei.enabled
diff --git a/helm-charts/web-retriever/README.md b/helm-charts/web-retriever/README.md
new file mode 100644
index 000000000..c3aee3a5b
--- /dev/null
+++ b/helm-charts/web-retriever/README.md
@@ -0,0 +1,58 @@
+# web-retriever
+
+Helm chart for deploying Web Retriever microservice.
+
+Web retriever depends on tei, you should set TEI_EMBEDDING_ENDPOINT endpoints before start.
+
+## (Option1): Installing the chart separately
+
+First, you need to install the tei chart, please refer to the [tei](../tei) chart for more information.
+
+After you've deployted the tei chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e `http://tei`.
+
+To install the web-retriever chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common/web-retriever
+helm dependency update
+export TEI_EMBEDDING_ENDPOINT="http://tei"
+export GOOGLE_API_KEY="yourownkey"
+export GOOGLE_CSE_ID="yourownid"
+helm install web-retriever . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set GOOGLE_API_KEY=${GOOGLE_API_KEY} --set GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
+```
+
+## (Option2): Installing the chart with dependencies automatically
+
+```console
+cd GenAIInfra/helm-charts/common/web-retriever
+helm dependency update
+export GOOGLE_API_KEY="yourownkey"
+export GOOGLE_CSE_ID="yourownid"
+helm install web-retriever . --set tei.enabled=true --set GOOGLE_API_KEY=${GOOGLE_API_KEY} --set GOOGLE_CSE_ID=${GOOGLE_CSE_ID}
+```
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are running.
+
+Then run the command `kubectl port-forward svc/web-retriever 7077:7077` to expose the web-retriever service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+curl http://localhost:7077/v1/web_retrieval \
+  -X POST \
+  -d "{\"text\":\"What is OPEA?\",\"embedding\":${your_embedding}}" \
+  -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                    | Type   | Default                       | Description |
+| ---------------------- | ------ | ----------------------------- | ----------- |
+| image.repository       | string | `"opea/web-retriever-chroma"` |             |
+| service.port           | string | `"7077"`                      |             |
+| TEI_EMBEDDING_ENDPOINT | string | `""`                          |             |
+| GOOGLE_API_KEY         | string | `""`                          |             |
+| GOOGLE_CSE_ID          | string | `""`                          |             |
diff --git a/helm-charts/web-retriever/ci-values.yaml b/helm-charts/web-retriever/ci-values.yaml
new file mode 100644
index 000000000..f0940ef43
--- /dev/null
+++ b/helm-charts/web-retriever/ci-values.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for web-retriever.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: true
diff --git a/helm-charts/web-retriever/templates/_helpers.tpl b/helm-charts/web-retriever/templates/_helpers.tpl
new file mode 100644
index 000000000..1bdb3e1b2
--- /dev/null
+++ b/helm-charts/web-retriever/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "web-retriever.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "web-retriever.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "web-retriever.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "web-retriever.labels" -}}
+helm.sh/chart: {{ include "web-retriever.chart" . }}
+{{ include "web-retriever.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "web-retriever.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "web-retriever.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "web-retriever.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "web-retriever.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/web-retriever/templates/configmap.yaml b/helm-charts/web-retriever/templates/configmap.yaml
new file mode 100644
index 000000000..a3ac0f9ac
--- /dev/null
+++ b/helm-charts/web-retriever/templates/configmap.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "web-retriever.fullname" . }}-config
+  labels:
+    {{- include "web-retriever.labels" . | nindent 4 }}
+data:
+  {{- if .Values.TEI_EMBEDDING_ENDPOINT }}
+  TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote}}
+  {{- else }}
+  TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei"
+  {{- end }}
+  GOOGLE_API_KEY: {{ .Values.GOOGLE_API_KEY | quote }}
+  GOOGLE_CSE_ID: {{ .Values.GOOGLE_CSE_ID | quote }}
+  EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  {{- if and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }}
+  no_proxy: "{{ .Release.Name }}-tei,{{ .Values.global.no_proxy }}"
+  {{- else }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  {{- end }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
diff --git a/helm-charts/web-retriever/templates/deployment.yaml b/helm-charts/web-retriever/templates/deployment.yaml
new file mode 100644
index 000000000..307206d3b
--- /dev/null
+++ b/helm-charts/web-retriever/templates/deployment.yaml
@@ -0,0 +1,90 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "web-retriever.fullname" . }}
+  labels:
+    {{- include "web-retriever.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "web-retriever.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "web-retriever.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "web-retriever.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: web-retriever
+              containerPort: 7077
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "web-retriever.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/web-retriever/templates/service.yaml b/helm-charts/web-retriever/templates/service.yaml
new file mode 100644
index 000000000..93b0b8da7
--- /dev/null
+++ b/helm-charts/web-retriever/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "web-retriever.fullname" . }}
+  labels:
+    {{- include "web-retriever.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 7077
+      protocol: TCP
+      name: web-retriever
+  selector:
+    {{- include "web-retriever.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/web-retriever/templates/tests/test-pod.yaml b/helm-charts/web-retriever/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..a1c0a3aaa
--- /dev/null
+++ b/helm-charts/web-retriever/templates/tests/test-pod.yaml
@@ -0,0 +1,31 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "web-retriever.fullname" . }}-testpod"
+  labels:
+    {{- include "web-retriever.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)");
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "web-retriever.fullname" . }}:{{ .Values.service.port }}/v1/web_retrieval \
+          -X POST \
+          -d "{\"text\":\"What is OPEA?\",\"embedding\":${your_embedding}}" \
+                -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/web-retriever/values.yaml b/helm-charts/web-retriever/values.yaml
new file mode 100644
index 000000000..e3d254476
--- /dev/null
+++ b/helm-charts/web-retriever/values.yaml
@@ -0,0 +1,95 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for web-retriever.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+tei:
+  enabled: false
+
+replicaCount: 1
+
+# Set it as a non-null string, such as true, if you want to enable logging facility,
+# otherwise, keep it as "" to disable it.
+LOGFLAG: ""
+
+TEI_EMBEDDING_ENDPOINT: ""
+GOOGLE_API_KEY: ""
+GOOGLE_CSE_ID: ""
+
+image:
+  repository: opea/web-retriever-chroma
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for retriever service is 7000
+  port: 7077
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: v1/health_check
+    port: web-retriever
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: v1/health_check
+    port: web-retriever
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: v1/health_check
+    port: web-retriever
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
diff --git a/helm-charts/whisper/.helmignore b/helm-charts/whisper/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/helm-charts/whisper/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm-charts/whisper/Chart.yaml b/helm-charts/whisper/Chart.yaml
new file mode 100644
index 000000000..c03ad7a0c
--- /dev/null
+++ b/helm-charts/whisper/Chart.yaml
@@ -0,0 +1,10 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: whisper
+description: The Helm chart for deploying whisper as microservice
+type: application
+version: 0-latest
+# The whisper microservice server version
+appVersion: "v1.0"
diff --git a/helm-charts/whisper/README.md b/helm-charts/whisper/README.md
new file mode 100644
index 000000000..6c1215515
--- /dev/null
+++ b/helm-charts/whisper/README.md
@@ -0,0 +1,32 @@
+# whisper
+
+Helm chart for deploying whisper service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+export MODELDIR=/mnt/opea-models
+export ASR_MODEL_PATH="openai/whisper-small"
+helm install whisper whisper --set global.modelUseHostPath=${MODELDIR} --set ASR_MODEL_PATH=${ASR_MODEL_PATH}
+```
+
+## Verify
+
+Use port-forward to access it from localhost.
+
+```console
+kubectl port-forward service/whisper 1234:7066 &
+curl http://localhost:1234/v1/asr \
+  -XPOST \
+  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+  -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key              | Type   | Default          | Description |
+| ---------------- | ------ | ---------------- | ----------- |
+| image.repository | string | `"opea/whisper"` |             |
+| service.port     | string | `"7066"`         |             |
diff --git a/helm-charts/whisper/ci-values.yaml b/helm-charts/whisper/ci-values.yaml
new file mode 120000
index 000000000..7d1010096
--- /dev/null
+++ b/helm-charts/whisper/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
\ No newline at end of file
diff --git a/helm-charts/whisper/gaudi-values.yaml b/helm-charts/whisper/gaudi-values.yaml
new file mode 100644
index 000000000..fec919ad3
--- /dev/null
+++ b/helm-charts/whisper/gaudi-values.yaml
@@ -0,0 +1,14 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for whisper.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+image:
+  repository: opea/whisper-gaudi
+  tag: "latest"
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
diff --git a/helm-charts/whisper/templates/_helpers.tpl b/helm-charts/whisper/templates/_helpers.tpl
new file mode 100644
index 000000000..264e205a4
--- /dev/null
+++ b/helm-charts/whisper/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "whisper.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "whisper.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "whisper.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "whisper.labels" -}}
+helm.sh/chart: {{ include "whisper.chart" . }}
+{{ include "whisper.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "whisper.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "whisper.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "whisper.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "whisper.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/whisper/templates/configmap.yaml b/helm-charts/whisper/templates/configmap.yaml
new file mode 100644
index 000000000..2e27dea5d
--- /dev/null
+++ b/helm-charts/whisper/templates/configmap.yaml
@@ -0,0 +1,20 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "whisper.fullname" . }}-config
+  labels:
+    {{- include "whisper.labels" . | nindent 4 }}
+data:
+  EASYOCR_MODULE_PATH: "/tmp/.EasyOCR"
+  ASR_MODEL_PATH: {{ .Values.ASR_MODEL_PATH | quote }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  HUGGINGFACE_HUB_CACHE: "/data"
diff --git a/helm-charts/whisper/templates/deployment.yaml b/helm-charts/whisper/templates/deployment.yaml
new file mode 100644
index 000000000..c5d79899c
--- /dev/null
+++ b/helm-charts/whisper/templates/deployment.yaml
@@ -0,0 +1,107 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "whisper.fullname" . }}
+  labels:
+    {{- include "whisper.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "whisper.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "whisper.selectorLabels" . | nindent 8 }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Release.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "whisper.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- if .Values.global.modelUseHostPath }}
+            {}
+            {{- else }}
+            {{- toYaml .Values.securityContext | nindent 12 }}
+            {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          ports:
+            - name: whisper
+              containerPort: 7066
+              protocol: TCP
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+      volumes:
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "whisper.selectorLabels" . | nindent 14 }}
+      {{- end }}
diff --git a/helm-charts/whisper/templates/service.yaml b/helm-charts/whisper/templates/service.yaml
new file mode 100644
index 000000000..42852290f
--- /dev/null
+++ b/helm-charts/whisper/templates/service.yaml
@@ -0,0 +1,18 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "whisper.fullname" . }}
+  labels:
+    {{- include "whisper.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: 7066
+      protocol: TCP
+      name: whisper
+  selector:
+    {{- include "whisper.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/whisper/templates/tests/test-pod.yaml b/helm-charts/whisper/templates/tests/test-pod.yaml
new file mode 100644
index 000000000..9d6b1de51
--- /dev/null
+++ b/helm-charts/whisper/templates/tests/test-pod.yaml
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "whisper.fullname" . }}-testpod"
+  labels:
+    {{- include "whisper.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+    #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure"
+spec:
+  containers:
+    - name: curl
+      image: python:3.10.14
+      command: ['bash', '-c']
+      args:
+        - |
+          max_retry=20;
+          for ((i=1; i<=max_retry; i++)); do
+            curl -sS --fail-with-body http://{{ include "whisper.fullname" . }}:{{ .Values.service.port }}/v1/asr \
+            -X POST \
+            -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
+            -H 'Content-Type: application/json' && break;
+            curlcode=$?
+            if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi;
+          done;
+          if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi
+  restartPolicy: Never
diff --git a/helm-charts/whisper/values.yaml b/helm-charts/whisper/values.yaml
new file mode 100644
index 000000000..c368fe669
--- /dev/null
+++ b/helm-charts/whisper/values.yaml
@@ -0,0 +1,96 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Default values for whisper.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+replicaCount: 1
+
+ASR_MODEL_PATH: "openai/whisper-small"
+
+image:
+  repository: opea/whisper
+  # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
+  # pullPolicy: ""
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
+podAnnotations: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext:
+  readOnlyRootFilesystem: false
+  allowPrivilegeEscalation: false
+  runAsNonRoot: true
+  runAsUser: 1000
+  capabilities:
+    drop:
+    - ALL
+  seccompProfile:
+    type: RuntimeDefault
+
+service:
+  type: ClusterIP
+  # The default port for whisper service is 7066
+  port: 7066
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+livenessProbe:
+  httpGet:
+    path: /v1/health
+    port: whisper
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 24
+readinessProbe:
+  httpGet:
+    path: /v1/health
+    port: whisper
+  initialDelaySeconds: 5
+  periodSeconds: 5
+startupProbe:
+  httpGet:
+    path: /v1/health
+    port: whisper
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  failureThreshold: 120
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
+
+global:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # Set modelUseHostPath for local directory, this is good for one node test. Example:
+  # modelUseHostPath: /mnt/opea-models
+  # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment. Example:
+  # modelUsePVC: model-volume
+  # You can only set one of the following var, the behavior is not defined is both are set.
+  # By default, both var are set to empty, the model will be downloaded and saved to a tmp volume.
+  modelUseHostPath: ""
+  modelUsePVC: ""