diff --git a/ChatQnA/kubernetes/intel/README_gmc.md b/ChatQnA/kubernetes/gmc/README_gmc.md similarity index 100% rename from ChatQnA/kubernetes/intel/README_gmc.md rename to ChatQnA/kubernetes/gmc/README_gmc.md diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_dataprep_gaudi.yaml b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_dataprep_gaudi.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_dataprep_xeon.yaml b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_dataprep_xeon.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_gaudi.yaml b/ChatQnA/kubernetes/gmc/chatQnA_gaudi.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_gaudi.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_gaudi.yaml diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_switch_gaudi.yaml b/ChatQnA/kubernetes/gmc/chatQnA_switch_gaudi.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/hpu/gaudi/gmc/chatQnA_switch_gaudi.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_switch_gaudi.yaml diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_switch_xeon.yaml b/ChatQnA/kubernetes/gmc/chatQnA_switch_xeon.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_switch_xeon.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_switch_xeon.yaml diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_xeon.yaml b/ChatQnA/kubernetes/gmc/chatQnA_xeon.yaml similarity index 100% rename from ChatQnA/kubernetes/intel/cpu/xeon/gmc/chatQnA_xeon.yaml rename to ChatQnA/kubernetes/gmc/chatQnA_xeon.yaml diff --git a/ChatQnA/kubernetes/helm-chart/Chart.yaml b/ChatQnA/kubernetes/helm-chart/Chart.yaml new file mode 100644 index 0000000000..6a33137a39 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/Chart.yaml @@ -0,0 +1,41 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chatqna +description: The Helm chart to deploy ChatQnA +type: application +dependencies: + - name: tgi + version: 0-latest + alias: tgi-guardrails + repository: "oci://ghcr.io/opea-project/charts/tgi" + condition: guardrails-usvc.enabled + - name: guardrails-usvc + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/guardrails-usvc" + condition: guardrails-usvc.enabled + - name: tgi + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/tgi" + - name: tei + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/tei" + - name: teirerank + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/teirerank" + - name: redis-vector-db + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/redis-vector-db" + - name: retriever-usvc + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/retriever-usvc" + - name: data-prep + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/data-prep" + - name: ui + alias: chatqna-ui + version: 0-latest + repository: "oci://ghcr.io/opea-project/charts/ui" +version: 0-latest +appVersion: "v1.0" diff --git a/ChatQnA/kubernetes/helm-chart/README.md b/ChatQnA/kubernetes/helm-chart/README.md new file mode 100644 index 0000000000..d3b8097c96 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/README.md @@ -0,0 +1,83 @@ +# ChatQnA + +Helm chart for deploying ChatQnA service. ChatQnA depends on the following services: + +- [data-prep](../common/data-prep/README.md) +- [embedding-usvc](../common/embedding-usvc/README.md) +- [tei](../common/tei/README.md) +- [retriever-usvc](../common/retriever-usvc/README.md) +- [redis-vector-db](../common/redis-vector-db/README.md) +- [reranking-usvc](../common/reranking-usvc/README.md) +- [teirerank](../common/teirerank/README.md) +- [llm-uservice](../common/llm-uservice/README.md) +- [tgi](../common/tgi/README.md) + +## Installing the Chart + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/ +./update_dependency.sh +helm dependency update chatqna +export HFTOKEN="insert-your-huggingface-token-here" +export MODELDIR="/mnt/opea-models" +export MODELNAME="Intel/neural-chat-7b-v3-3" +# If you would like to use the traditional UI, please change the image as well as the containerport within the values +# append these at the end of the command "--set chatqna-ui.image.repository=opea/chatqna-ui,chatqna-ui.image.tag=latest,chatqna-ui.containerPort=5173" +helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} +# To use Gaudi device +#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml +# To use Nvidia GPU +#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml +# To include guardrail component in chatqna on Xeon +#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-values.yaml +# To include guardrail component in chatqna on Gaudi +#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml +``` + +### IMPORTANT NOTE + +1. Make sure your `MODELDIR` exists on the node where your workload is scheduled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are running. + +Curl command and UI are the two options that can be leveraged to verify the result. + +### Verify the workload through curl command + +Run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +curl http://localhost:8888/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' +``` + +### Verify the workload through UI + +The UI has already been installed via the Helm chart. To access it, use the external IP of one your Kubernetes node along with the NGINX port. You can find the NGINX port using the following command: + +```bash +export port=$(kubectl get service chatqna-nginx --output='jsonpath={.spec.ports[0].nodePort}') +echo $port +``` + +Open a browser to access `http://:${port}` to play with the ChatQnA workload. + +## Values + +| Key | Type | Default | Description | +| ----------------- | ------ | ----------------------------- | -------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/chatqna"` | | +| service.port | string | `"8888"` | | +| tgi.LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.monitoring | bool | `false` | Enable usage metrics for the service components. See ../monitoring.md before enabling! | + +## Troubleshooting + +If you encounter any issues, please refer to [ChatQnA Troubleshooting](troubleshooting.md) diff --git a/ChatQnA/kubernetes/helm-chart/ci-gaudi-values.yaml b/ChatQnA/kubernetes/helm-chart/ci-gaudi-values.yaml new file mode 120000 index 0000000000..7243d31b2f --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/ci-gaudi-values.yaml @@ -0,0 +1 @@ +gaudi-values.yaml \ No newline at end of file diff --git a/ChatQnA/kubernetes/helm-chart/ci-guardrails-gaudi-values.yaml b/ChatQnA/kubernetes/helm-chart/ci-guardrails-gaudi-values.yaml new file mode 120000 index 0000000000..088821006f --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/ci-guardrails-gaudi-values.yaml @@ -0,0 +1 @@ +guardrails-gaudi-values.yaml \ No newline at end of file diff --git a/ChatQnA/kubernetes/helm-chart/ci-values.yaml b/ChatQnA/kubernetes/helm-chart/ci-values.yaml new file mode 120000 index 0000000000..7d10100961 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/ChatQnA/kubernetes/helm-chart/cpu-values.yaml b/ChatQnA/kubernetes/helm-chart/cpu-values.yaml new file mode 100644 index 0000000000..b4c5ee5ddb --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/cpu-values.yaml @@ -0,0 +1,109 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Override CPU resource request and probe timing values in specific subcharts +# +# RESOURCES +# +# Resource request matching actual resource usage (with enough slack) +# is important when service is scaled up, so that right amount of pods +# get scheduled to right nodes. +# +# Because resource usage depends on the used devices, model, data type +# and SW versions, and this top-level chart has overrides for them, +# resource requests need to be specified here too. +# +# To test service without resource request, use "resources: {}". +# +# PROBES +# +# Inferencing pods startup / warmup takes *much* longer on CPUs than +# with acceleration devices, and their responses are also slower, +# especially when node is running several instances of these services. +# +# Kubernetes restarting pod before its startup finishes, or not +# sending it queries because it's not in ready state due to slow +# readiness responses, does really NOT help in getting faster responses. +# +# => probe timings need to be increased when running on CPU. + +tgi: + # TODO: add Helm value also for TGI data type option: + # https://github.com/opea-project/GenAIExamples/issues/330 + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + + # Potentially suitable values for scaling CPU TGI 2.2 with Intel/neural-chat-7b-v3-3 @ 32-bit: + resources: + limits: + cpu: 8 + memory: 70Gi + requests: + cpu: 6 + memory: 65Gi + + livenessProbe: + initialDelaySeconds: 8 + periodSeconds: 8 + failureThreshold: 24 + timeoutSeconds: 4 + readinessProbe: + initialDelaySeconds: 16 + periodSeconds: 8 + timeoutSeconds: 4 + startupProbe: + initialDelaySeconds: 10 + periodSeconds: 5 + failureThreshold: 180 + timeoutSeconds: 2 + +teirerank: + RERANK_MODEL_ID: "BAAI/bge-reranker-base" + + # Potentially suitable values for scaling CPU TEI v1.5 with BAAI/bge-reranker-base model: + resources: + limits: + cpu: 4 + memory: 30Gi + requests: + cpu: 2 + memory: 25Gi + + livenessProbe: + initialDelaySeconds: 8 + periodSeconds: 8 + failureThreshold: 24 + timeoutSeconds: 4 + readinessProbe: + initialDelaySeconds: 8 + periodSeconds: 8 + timeoutSeconds: 4 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +tei: + EMBEDDING_MODEL_ID: "BAAI/bge-base-en-v1.5" + + # Potentially suitable values for scaling CPU TEI 1.5 with BAAI/bge-base-en-v1.5 model: + resources: + limits: + cpu: 4 + memory: 4Gi + requests: + cpu: 2 + memory: 3Gi + + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 24 + timeoutSeconds: 2 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 diff --git a/ChatQnA/kubernetes/helm-chart/gaudi-values.yaml b/ChatQnA/kubernetes/helm-chart/gaudi-values.yaml new file mode 100644 index 0000000000..47df99fc44 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/gaudi-values.yaml @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +# TGI: largest bottleneck for ChatQnA +tgi: + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.6" + resources: + limits: + habana.ai/gaudi: 1 + # higher limits are needed with extra input tokens added by rerank + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + +# Reranking: second largest bottleneck when reranking is in use +# (i.e. query context docs have been uploaded with data-prep) +teirerank: + accelDevice: "gaudi" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + MAX_WARMUP_SEQUENCE_LENGTH: "512" + image: + repository: ghcr.io/huggingface/tei-gaudi + tag: 1.5.0 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + readOnlyRootFilesystem: false + livenessProbe: + timeoutSeconds: 1 + readinessProbe: + timeoutSeconds: 1 + +# Embedding: Second largest bottleneck without rerank +# By default tei on gaudi is disabled. +# tei: +# accelDevice: "gaudi" +# OMPI_MCA_btl_vader_single_copy_mechanism: "none" +# MAX_WARMUP_SEQUENCE_LENGTH: "512" +# image: +# repository: ghcr.io/huggingface/tei-gaudi +# tag: 1.5.0 +# resources: +# limits: +# habana.ai/gaudi: 1 +# securityContext: +# readOnlyRootFilesystem: false +# livenessProbe: +# timeoutSeconds: 1 +# readinessProbe: +# timeoutSeconds: 1 diff --git a/ChatQnA/kubernetes/helm-chart/guardrails-gaudi-values.yaml b/ChatQnA/kubernetes/helm-chart/guardrails-gaudi-values.yaml new file mode 100644 index 0000000000..2d8192abeb --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/guardrails-gaudi-values.yaml @@ -0,0 +1,108 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/chatqna-guardrails + tag: "latest" + +# guardrails related config +guardrails-usvc: + enabled: true + # SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails" + SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + +# gaudi related config +# tei running on CPU by default +# tei: +# accelDevice: "gaudi" +# image: +# repository: ghcr.io/huggingface/tei-gaudi +# tag: 1.5.0 +# resources: +# limits: +# habana.ai/gaudi: 1 +# securityContext: +# readOnlyRootFilesystem: false +# livenessProbe: +# timeoutSeconds: 1 +# readinessProbe: +# timeoutSeconds: 1 + +teirerank: + accelDevice: "gaudi" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + MAX_WARMUP_SEQUENCE_LENGTH: "512" + image: + repository: ghcr.io/huggingface/tei-gaudi + tag: "1.5.0" + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + readOnlyRootFilesystem: false + livenessProbe: + timeoutSeconds: 1 + readinessProbe: + timeoutSeconds: 1 + +tgi: + accelDevice: "gaudi" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.6" + resources: + limits: + habana.ai/gaudi: 1 + # higher limits are needed with extra input tokens added by rerank + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + +tgi-guardrails: + accelDevice: "gaudi" + LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" + image: + repository: ghcr.io/huggingface/tgi-gaudi + tag: "2.0.6" + resources: + limits: + habana.ai/gaudi: 1 + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" + CUDA_GRAPHS: "" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + ENABLE_HPU_GRAPH: "true" + LIMIT_HPU_GRAPH: "true" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/ChatQnA/kubernetes/helm-chart/guardrails-values.yaml b/ChatQnA/kubernetes/helm-chart/guardrails-values.yaml new file mode 100644 index 0000000000..8a0603bcc7 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/guardrails-values.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +image: + repository: opea/chatqna-guardrails + tag: "latest" + +# guardrails related config +guardrails-usvc: + enabled: true + # SAFETY_GUARD_ENDPOINT: "http://{{ .Release.Name }}-tgi-guardrails" + SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" +tgi-guardrails: + LLM_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" diff --git a/ChatQnA/kubernetes/helm-chart/hpa-values.yaml b/ChatQnA/kubernetes/helm-chart/hpa-values.yaml new file mode 100644 index 0000000000..a374991f15 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/hpa-values.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Enable HorizontalPodAutoscaler (HPA) +# +# That will overwrite named PrometheusAdapter configMap with ChatQnA specific +# custom metric queries for embedding, reranking, tgi services. +# +# Default upstream configMap is in: +# - https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/deploy/manifests/config-map.yaml + +autoscaling: + enabled: true + +# Override values in specific subcharts + +# Enabling "autoscaling" for any of the subcharts requires enabling it also above! +tgi: + autoscaling: + maxReplicas: 4 + enabled: true +teirerank: + autoscaling: + maxReplicas: 3 + enabled: true +tei: + autoscaling: + maxReplicas: 2 + enabled: true diff --git a/ChatQnA/kubernetes/helm-chart/nv-values.yaml b/ChatQnA/kubernetes/helm-chart/nv-values.yaml new file mode 100644 index 0000000000..67c4e3ac18 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/nv-values.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# To override values in subchart tgi +tgi: + accelDevice: "nvidia" + image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "2.2.0" + resources: + limits: + nvidia.com/gpu: 1 + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/ChatQnA/kubernetes/helm-chart/templates/_helpers.tpl b/ChatQnA/kubernetes/helm-chart/templates/_helpers.tpl new file mode 100644 index 0000000000..63cc642b98 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "chatqna.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chatqna.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chatqna.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "chatqna.labels" -}} +helm.sh/chart: {{ include "chatqna.chart" . }} +{{ include "chatqna.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "chatqna.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chatqna.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "chatqna.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "chatqna.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/custom-metrics-configmap.yaml b/ChatQnA/kubernetes/helm-chart/templates/custom-metrics-configmap.yaml new file mode 100644 index 0000000000..440a4019e2 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/custom-metrics-configmap.yaml @@ -0,0 +1,86 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if and .Values.global.monitoring .Values.autoscaling.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + # easy to find for the required manual step + namespace: default + name: {{ include "chatqna.fullname" . }}-custom-metrics + labels: + app.kubernetes.io/name: prometheus-adapter +data: + config.yaml: | + rules: + {{- if .Values.tgi.autoscaling.enabled }} + # check metric with: + # kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/ | jq + # + {{- if .Values.tgi.accelDevice }} + - seriesQuery: '{__name__="tgi_queue_size",service="{{ include "tgi.fullname" .Subcharts.tgi }}"}' + # TGI instances queue_size sum + metricsQuery: 'sum by (namespace,service) (tgi_queue_size{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>})' + name: + matches: ^tgi_queue_size + as: "{{ include "tgi.metricPrefix" .Subcharts.tgi }}_queue_size_sum" + {{- else }} + - seriesQuery: '{__name__="tgi_request_inference_duration_sum",service="{{ include "tgi.fullname" .Subcharts.tgi }}"}' + # Average request latency from TGI histograms, over 1 min + # (0.001 divider add is to make sure there's always a valid value) + metricsQuery: 'rate(tgi_request_inference_duration_sum{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(tgi_request_inference_duration_count{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]))' + name: + matches: ^tgi_request_inference_duration_sum + as: "{{ include "tgi.metricPrefix" .Subcharts.tgi }}_request_latency" + {{- end }} + resources: + # HPA needs both namespace + suitable object resource for its query paths: + # /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/ + # (pod is not suitable object type for matching as each instance has different name) + overrides: + namespace: {resource: "namespace"} + service: {resource: "service"} + {{- end }} + {{- if .Values.teirerank.autoscaling.enabled }} + {{- if .Values.teirerank.accelDevice }} + - seriesQuery: '{__name__="te_queue_size",service="{{ include "teirerank.fullname" .Subcharts.teirerank }}"}' + # TEI instances queue_size sum + metricsQuery: 'sum by (namespace,service) (te_queue_size{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>})' + name: + matches: ^te_queue_size + as: "{{ include "teirerank.metricPrefix" .Subcharts.teirerank }}_queue_size_sum" + {{- else }} + - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "teirerank.fullname" .Subcharts.teirerank }}"}' + # Average request latency from TEI histograms, over 1 min + metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]))' + name: + matches: ^te_request_inference_duration_sum + as: "{{ include "teirerank.metricPrefix" .Subcharts.teirerank }}_request_latency" + {{- end }} + resources: + overrides: + namespace: {resource: "namespace"} + service: {resource: "service"} + {{- end }} + {{- if .Values.tei.autoscaling.enabled }} + {{- if .Values.tei.accelDevice }} + - seriesQuery: '{__name__="te_queue_size",service="{{ include "tei.fullname" .Subcharts.tei }}"}' + # TEI instances queue_size sum + metricsQuery: 'sum by (namespace,service) (te_queue_size{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>})' + name: + matches: ^te_queue_size + as: "{{ include "tei.metricPrefix" .Subcharts.tei }}_queue_size_sum" + {{- else }} + - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "tei.fullname" .Subcharts.tei }}"}' + # Average request latency from TEI histograms, over 1 min + metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]))' + name: + matches: ^te_request_inference_duration_sum + as: "{{ include "tei.metricPrefix" .Subcharts.tei }}_request_latency" + {{- end }} + resources: + overrides: + namespace: {resource: "namespace"} + service: {resource: "service"} + {{- end }} +{{- end }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/deployment.yaml b/ChatQnA/kubernetes/helm-chart/templates/deployment.yaml new file mode 100644 index 0000000000..ced873870f --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/deployment.yaml @@ -0,0 +1,101 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chatqna.fullname" . }} + labels: + {{- include "chatqna.labels" . | nindent 4 }} + app: {{ include "chatqna.fullname" . }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "chatqna.selectorLabels" . | nindent 6 }} + app: {{ include "chatqna.fullname" . }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "chatqna.selectorLabels" . | nindent 8 }} + app: {{ include "chatqna.fullname" . }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Release.Name }} + env: + - name: LLM_SERVER_HOST_IP + value: {{ .Release.Name }}-tgi + - name: LLM_SERVER_PORT + value: "80" + - name: LLM_MODEL + value: {{ .Values.tgi.LLM_MODEL_ID | quote }} + - name: RERANK_SERVER_HOST_IP + value: {{ .Release.Name }}-teirerank + - name: RERANK_SERVER_PORT + value: "80" + - name: RETRIEVER_SERVICE_HOST_IP + value: {{ .Release.Name }}-retriever-usvc + - name: RETRIEVER_SERVICE_PORT + value: "7000" + - name: EMBEDDING_SERVER_HOST_IP + value: {{ .Release.Name }}-tei + - name: EMBEDDING_SERVER_PORT + value: "80" + - name: GUARDRAIL_SERVICE_HOST_IP + value: {{ .Release.Name }}-guardrails-usvc + - name: GUARDRAIL_SERVICE_PORT + value: "9090" + {{- if .Values.LOGFLAG }} + - name: LOGFLAG + value: {{ .Values.LOGFLAG | quote }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + volumeMounts: + - mountPath: /tmp + name: tmp + ports: + - name: chatqna + containerPort: {{ .Values.port }} + protocol: TCP + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumes: + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "chatqna.selectorLabels" . | nindent 14 }} + app: {{ include "chatqna.fullname" . }} + {{- end }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/nginx-deployment.yaml b/ChatQnA/kubernetes/helm-chart/templates/nginx-deployment.yaml new file mode 100644 index 0000000000..8b5c897de0 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/nginx-deployment.yaml @@ -0,0 +1,130 @@ +apiVersion: v1 +data: + default.conf: |+ + # Copyright (C) 2024 Intel Corporation + # SPDX-License-Identifier: Apache-2.0 + + + server { + listen 80; + listen [::]:80; + + location /home { + alias /usr/share/nginx/html/index.html; + } + + proxy_connect_timeout 600; + proxy_send_timeout 600; + proxy_read_timeout 600; + send_timeout 600; + client_max_body_size 10G; + + location / { + proxy_pass http://{{ include "ui.fullname" (index .Subcharts "chatqna-ui") }}:{{ index .Values "chatqna-ui" "service" "port" }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/chatqna { + proxy_pass http://{{ include "chatqna.fullname" . }}:{{ .Values.service.port }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_buffering off; + proxy_cache off; + proxy_request_buffering off; + gzip off; + } + + location /v1/dataprep { + proxy_pass http://{{ include "data-prep.fullname" (index .Subcharts "data-prep") }}:{{ index .Values "data-prep" "service" "port" }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/dataprep/get_file { + proxy_pass http://{{ include "data-prep.fullname" (index .Subcharts "data-prep") }}:{{ index .Values "data-prep" "service" "port" }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location /v1/dataprep/delete_file { + proxy_pass http://{{ include "data-prep.fullname" (index .Subcharts "data-prep") }}:{{ index .Values "data-prep" "service" "port" }}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + +kind: ConfigMap +metadata: + name: {{ include "chatqna.fullname" . }}-nginx-config +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chatqna.fullname" . }}-nginx + labels: + {{- include "chatqna.labels" . | nindent 4 }} + app: {{ include "chatqna.fullname" . }}-nginx +spec: + selector: + matchLabels: + {{- include "chatqna.selectorLabels" . | nindent 6 }} + app: {{ include "chatqna.fullname" . }}-nginx + template: + metadata: + labels: + {{- include "chatqna.selectorLabels" . | nindent 8 }} + app: {{ include "chatqna.fullname" . }}-nginx + spec: + containers: + - image: nginx:1.27.1 + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + name: nginx + volumeMounts: + - mountPath: /etc/nginx/conf.d + name: nginx-config-volume + securityContext: {} + volumes: + - configMap: + defaultMode: 420 + name: {{ include "chatqna.fullname" . }}-nginx-config + name: nginx-config-volume + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chatqna.fullname" . }}-nginx +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + {{- include "chatqna.selectorLabels" . | nindent 4 }} + app: {{ include "chatqna.fullname" . }}-nginx + type: {{ .Values.nginx.service.type }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/service.yaml b/ChatQnA/kubernetes/helm-chart/templates/service.yaml new file mode 100644 index 0000000000..83e46afc38 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/service.yaml @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chatqna.fullname" . }} + labels: + {{- include "chatqna.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.port }} + protocol: TCP + name: chatqna + selector: + {{- include "chatqna.selectorLabels" . | nindent 4 }} + app: {{ include "chatqna.fullname" . }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/servicemonitor.yaml b/ChatQnA/kubernetes/helm-chart/templates/servicemonitor.yaml new file mode 100644 index 0000000000..062c1b6684 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "chatqna.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "chatqna.selectorLabels" . | nindent 6 }} + endpoints: + - port: chatqna + interval: 5s +{{- end }} diff --git a/ChatQnA/kubernetes/helm-chart/templates/tests/test-pod.yaml b/ChatQnA/kubernetes/helm-chart/templates/tests/test-pod.yaml new file mode 100644 index 0000000000..18c4246c6e --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/templates/tests/test-pod.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "chatqna.fullname" . }}-testpod" + labels: + {{- include "chatqna.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test + #"helm.sh/hook-delete-policy": "hook-succeeded, hook-failure" +spec: + containers: + - name: curl + image: python:3.10.14 + command: ['bash', '-c'] + args: + - | + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "chatqna.fullname" . }}:{{ .Values.service.port }}/v1/chatqna -sS --fail-with-body \ + -d '{"messages": "What is the machine learning?"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/ChatQnA/kubernetes/helm-chart/troubleshooting.md b/ChatQnA/kubernetes/helm-chart/troubleshooting.md new file mode 100644 index 0000000000..7dbffa9e5a --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/troubleshooting.md @@ -0,0 +1,153 @@ +# ChatQnA Troubleshooting + +After deploying chatqna with helm chart, we can use the following command to check whether each service is working properly. +These commands show the steps how RAG work with LLM. + +## a function to get the endpoint of service + +This is a based command to get each service endpoint of chatqna components. + +```bash +svc_endpoint() { + endpoint=$(kubectl -n ${2:-default} get svc -l ${1} -o jsonpath='{.items[0].spec.clusterIP}:{.items[0].spec.ports[0].port}') + echo "${endpoint}" +} +``` + +## define the namespace of service + +Please specify the namespace of chatqna, it will be **default** if not define. + +``` +# define your namespace +ns=opea-chatqna +``` + +Check the available namespace by: + +```console +kubectl get ns +NAME STATUS AGE +calico-system Active 21d +cert-manager Active 21d +default Active 21d +kube-public Active 21d +kube-system Active 21d +nfd Active 21d +observability Active 21d +opea-chatqna Active 21d +openebs Active 21d +orchestrator-system Active 21d +tigera-operator Active 21d +``` + +## Update a file to database + +This step will upload a pdf about nike revenue information to vector database. + +```bash +# data-prep +label='app.kubernetes.io/name=data-prep' + +wget https://raw.githubusercontent.com/opea-project/GenAIComps/refs/heads/main/comps/retrievers/redis/data/nke-10k-2023.pdf + +endpoint=$(svc_endpoint ${label} ${ns}) +echo $endpoint +curl -x "" -X POST "http://${endpoint}/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" +``` + +> **_NOTE:_** Get the service label by: +> +> ```bash +> kubectl get -n ${ns} svc -o json | jq .items[].metadata.labels +> ``` +> +> you can use **grep** to filter the labels by key. + +## get the embedding of input + +This step will get the embedding of your input/question. + +```bash +label='app.kubernetes.io/name=tei' +input="What is the revenue of Nike in 2023?" + +endpoint=$(svc_endpoint ${label} ${ns}) +echo $endpoint + +your_embedding=$(curl -x "" http://${endpoint}/embed \ + -X POST \ + -d '{"inputs":"'"$input"'"}' \ + -H 'Content-Type: application/json' |jq .[0] -c) +``` + +## get the retriever docs + +This step will get related docs related to your input/question. + +```bash +label='app.kubernetes.io/name=retriever-usvc' +text=$input + +endpoint=$(svc_endpoint ${label} ${ns}) +echo $endpoint + +retrieved_docs=$(curl -x "" http://${endpoint}/v1/retrieval \ + -X POST \ + -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' | jq -c .retrieved_docs) +``` + +## reranking the docs + +This step will get related docs most relevant to your input/question. + +```bash +label='app.kubernetes.io/name=reranking-usvc' +query=$input + +endpoint=$(svc_endpoint ${label} ${ns}) +echo $endpoint + +reranking_docs=$(curl -x "" http://${endpoint}/v1/reranking \ + -X POST \ + -d '{"initial_query":"'"$query"'", "retrieved_docs": '"$retrieved_docs"'}' \ + -H 'Content-Type: application/json' | jq -c .documents[0]) + +# remove " +reranking_docs=$(sed 's/\\"/ /g' <<< "${reranking_docs}") +reranking_docs=$(tr -d '"' <<< "${reranking_docs}") +``` + +## TGI Q and A + +This step will render the answer of your question. + +```bash +label='app.kubernetes.io/name=tgi' + +endpoint=$(svc_endpoint ${label} ${ns}) +echo $endpoint + +# your question +query=${input} +# inputs template. +inputs="### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: ${reranking_docs} ### Question: ${query} \n\n### Answer:" + +curl -x "" http://${endpoint}/generate \ + -X POST \ + -d '{"inputs":"'"${inputs}"'","parameters":{"max_new_tokens":1024, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +The output + +```console +{"generated_text":" In fiscal 2023, NIKE, Inc. achieved record Revenues of $51.2 billion."} +``` + +## REF + +[Build Mega Service of ChatQnA on Xeon](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker_compose/intel/cpu/xeon/README.md) diff --git a/ChatQnA/kubernetes/helm-chart/values.yaml b/ChatQnA/kubernetes/helm-chart/values.yaml new file mode 100644 index 0000000000..0082c2ab02 --- /dev/null +++ b/ChatQnA/kubernetes/helm-chart/values.yaml @@ -0,0 +1,79 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for chatqna. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: opea/chatqna + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: "" + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +port: 8888 +service: + type: ClusterIP + port: 8888 + +nginx: + service: + type: NodePort + +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# This is just to avoid Helm errors when HPA is NOT used +# (use hpa-values.yaml files to actually enable HPA). +autoscaling: + enabled: false + +# Override values in specific subcharts +tgi: + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + +# disable guardrails-usvc by default +# See guardrails-values.yaml for guardrail related options +guardrails-usvc: + enabled: false + +# If you would like to switch to traditional UI image +# Uncomment the following lines +# chatqna-ui: +# image: +# repository: opea/chatqna-ui +# tag: "latest" +# containerPort: "5173" + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # set modelUseHostPath or modelUsePVC to use model cache. + modelUseHostPath: "" + # modelUseHostPath: /mnt/opea-models + # modelUsePVC: model-volume + + # Install Prometheus serviceMonitors for service components + monitoring: false + + # Prometheus Helm install release name needed for serviceMonitors + prometheusRelease: prometheus-stack diff --git a/ChatQnA/kubernetes/intel/README.md b/ChatQnA/kubernetes/intel/README.md deleted file mode 100644 index 191e71b2b2..0000000000 --- a/ChatQnA/kubernetes/intel/README.md +++ /dev/null @@ -1,99 +0,0 @@ -# Deploy ChatQnA in Kubernetes Cluster - -> [NOTE] -> The following values must be set before you can deploy: -> HUGGINGFACEHUB_API_TOKEN -> -> You can also customize the "MODEL_ID" if needed. -> -> You need to make sure you have created the directory `/mnt/opea-models` to save the cached model on the node where the ChatQnA workload is running. Otherwise, you need to modify the `chatqna.yaml` file to change the `model-volume` to a directory that exists on the node. -> -> File upload size limit: The maximum size for uploaded files is 10GB. - -## Deploy On Xeon - -``` -cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna.yaml -kubectl apply -f chatqna.yaml -``` - -Newer CPUs such as Intel Cooper Lake, Sapphire Rapids, support [`bfloat16` data type](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). If you have such CPUs, and given model supports `bfloat16`, adding `--dtype bfloat16` argument for `huggingface/text-generation-inference` server halves its memory usage and speeds it a bit. To use it, run the following commands: - -``` -# label your node for scheduling the service on it automatically -kubectl label node 'your-node-name' node-type=node-bfloat16 - -# add `nodeSelector` for the `huggingface/text-generation-inference` server at `chatqna_bf16.yaml` -# create -kubectl apply -f chatqna_bf16.yaml -``` - -## Deploy On Gaudi - -``` -cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna.yaml -kubectl apply -f chatqna.yaml -``` - -## Deploy on Xeon with Remote LLM Model - -``` -cd GenAIExamples/ChatQnA/kubernetes/intel/cpu/xeon/manifest -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -export vLLM_ENDPOINT="Your Remote Inference Endpoint" -sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna-remote-inference.yaml -sed -i "s|insert-your-remote-inference-endpoint|${vLLM_ENDPOINT}|g" chatqna-remote-inference.yaml -``` - -### Additional Steps for Remote Endpoints with Authentication (If No Authentication Skip This Step) - -If your remote inference endpoint is protected with OAuth Client Credentials authentication, update CLIENTID, CLIENT_SECRET and TOKEN_URL with the correct values in "chatqna-llm-uservice-config" ConfigMap - - - -### Deploy -``` -kubectl apply -f chatqna-remote-inference.yaml -``` - -## Deploy on Gaudi with TEI, Rerank, and vLLM Models Running Remotely - -``` -cd GenAIExamples/ChatQnA/kubernetes/intel/hpu/gaudi/manifest -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -export vLLM_ENDPOINT="Your Remote Inference Endpoint" -export TEI_EMBEDDING_ENDPOINT="Your Remote TEI Embedding Endpoint" -export TEI_RERANKING_ENDPOINT="Your Remote Reranking Endpoint" - -sed -i "s|insert-your-huggingface-token-here|${HUGGINGFACEHUB_API_TOKEN}|g" chatqna-vllm-remote-inference.yaml -sed -i "s|insert-your-remote-vllm-inference-endpoint|${vLLM_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml -sed -i "s|insert-your-remote-embedding-endpoint|${TEI_EMBEDDING_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml -sed -i "s|insert-your-remote-reranking-endpoint|${TEI_RERANKING_ENDPOINT}|g" chatqna-vllm-remote-inference.yaml -``` - -### Additional Steps for Remote Endpoints with Authentication (If No Authentication Skip This Step) - -If your remote inference endpoint is protected with OAuth Client Credentials authentication, update CLIENTID, CLIENT_SECRET and TOKEN_URL with the correct values in "chatqna-llm-uservice-config", "chatqna-data-prep-config", "chatqna-embedding-usvc-config", "chatqna-reranking-usvc-config", "chatqna-retriever-usvc-config" ConfigMaps - -### Deploy -``` -kubectl apply -f chatqna-vllm-remote-inference.yaml -``` - -## Verify Services - -To verify the installation, run the command `kubectl get pod` to make sure all pods are running. - -Then run the command `kubectl port-forward svc/chatqna 8888:8888` to expose the ChatQnA service for access. - -Open another terminal and run the following command to verify the service if working: - -```console -curl http://localhost:8888/v1/chatqna \ - -H 'Content-Type: application/json' \ - -d '{"messages": "What is the revenue of Nike in 2023?"}' -``` diff --git a/ChatQnA/kubernetes/intel/README_single_node.md b/ChatQnA/kubernetes/intel/README_single_node.md deleted file mode 100644 index 7d98d12bf3..0000000000 --- a/ChatQnA/kubernetes/intel/README_single_node.md +++ /dev/null @@ -1,53 +0,0 @@ -# Deploy ChatQnA in Kubernetes Cluster on Single Node environment (Minikube) - -The following instructions are to deploy the ChatQnA example on a single Node using Kubernetes for testing purposes. -## Minikube setup -1. Install [Minikube](https://minikube.sigs.k8s.io/docs/start/) following the quickstart guide -2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/) -3. Build the container images, following the steps under "Build Docker Images" section in the [docker-compose README](../../docker_compose/intel/cpu/xeon/README.md) to checkout [GenAIComps](https://github.com/opea-project/GenAIComps.git) and build other images with your changes for development. -```bash -# Example on building frontend Docker image -cd GenAIExamples/ChatQnA/ui -docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . -# etc... -``` -The built images should be visible in the local Docker registry. Other images which have not been built with your changes (or not present in your local Docker registry) will be pulled from [docker hub](https://hub.docker.com/u/opea) by Minikube later in step 6. -```bash -docker images | grep opea -# REPOSITORY TAG IMAGE ID CREATED SIZE -# opea/chatqna-ui latest 8f2fa2523b85 6 days ago 1.56GB -# opea/chatqna latest 7f2602a7a266 6 days ago 821MB -# ... -``` -4. The built images must be imported into the Minikube registry from the local Docker registry. This can be done using `minikube load `image. -```bash -minikube image load opea/chatqna -minikube image load opea/chatqna-ui -# etc... -``` -5. Start the minikube cluster with `minikube start`, check that the minikube container (kicbase) is up with `docker ps` -```bash -docker ps -# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -# de088666cef2 gcr.io/k8s-minikube/kicbase:v0.0.45 "/usr/local/bin/entr…" 2 days ago Up 2 days 127.0.0.1:49157->22/tcp... minikube -``` -6. Deploy the ChatQnA application with `kubectl apply -f chatqna.yaml`, check that the opea pods are in a running state with `kubectl get pods` -```bash -kubectl get pods -# NAME READY STATUS RESTARTS AGE -# chatqna-78b4f5865-qbzms 1/1 Running 0 2d3h -# chatqna-chatqna-ui-54c8dfb6cf-fll5g 1/1 Running 0 2d3h -# etc... -``` - -7. Forward the port of the chatqna service from Minikube to the host, and test the service as you would a normal k8s cluster deployment -```bash -# port-forward to expose the chatqna endpoint from within the minikube cluster -kubectl port-forward svc/chatqna 8888:8888 -curl http://localhost:8888/v1/chatqna \ - -H 'Content-Type: application/json' \ - -d '{"messages": "What is the revenue of Nike in 2023?"}' - -# Similarly port-forward to expose the chatqna-ui endpoint and use the UI at :5173 in your browser -kubectl port-forward svc/chatqna-chatqna-ui 5173:5173 -``` diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml deleted file mode 100644 index 7265ebff5d..0000000000 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-guardrails.yaml +++ /dev/null @@ -1,1327 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-guardrails-usvc-config - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - SAFETY_GUARD_ENDPOINT: "http://chatqna-tgi-guardrails" - SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - LOGFLAG: "" - http_proxy: "" - https_proxy: "" - no_proxy: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi-guardrails/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-guardrails-config - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" - PORT: "2083" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - CUDA_GRAPHS: "0" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-config - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "Intel/neural-chat-7b-v3-3" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - CUDA_GRAPHS: "0" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/guardrails-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-guardrails-usvc - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9090 - targetPort: 9090 - protocol: TCP - name: guardrails-usvc - selector: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi-guardrails/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi-guardrails - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2083 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-guardrails-usvc - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-guardrails-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/guardrails-tgi:latest" - imagePullPolicy: Always - ports: - - name: guardrails-usvc - containerPort: 9090 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi-guardrails/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi-guardrails - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi-guardrails - envFrom: - - configMapRef: - name: chatqna-tgi-guardrails-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2083 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi - envFrom: - - configMapRef: - name: chatqna-tgi-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVER_HOST_IP - value: chatqna-tgi - - name: RERANK_SERVER_HOST_IP - value: chatqna-teirerank - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVER_HOST_IP - value: chatqna-tei - - name: GUARDRAIL_SERVICE_HOST_IP - value: chatqna-guardrails-usvc - - name: GUARDRAIL_SERVICE_PORT - value: "9090" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna-guardrails:latest" - imagePullPolicy: Always - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml deleted file mode 100644 index 5778132686..0000000000 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml +++ /dev/null @@ -1,1323 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-embedding-usvc-config - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/llm-uservice/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-llm-uservice-config - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - vLLM_ENDPOINT: "insert-your-remote-inference-endpoint" - LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct" - LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" - MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" ---- -# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-reranking-usvc-config - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/embedding-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6000 - targetPort: 6000 - protocol: TCP - name: embedding-usvc - selector: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/llm-uservice/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - name: llm-uservice - selector: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/reranking-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8000 - targetPort: 8000 - protocol: TCP - name: reranking-usvc - selector: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- - -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-embedding-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/embedding-tei:latest" - imagePullPolicy: IfNotPresent - ports: - - name: embedding-usvc - containerPort: 6000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/llm-uservice/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-llm-uservice-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/llm-vllm:latest" - imagePullPolicy: IfNotPresent - ports: - - name: llm-uservice - containerPort: 9000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-reranking-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/reranking-tei:latest" - imagePullPolicy: IfNotPresent - ports: - - name: reranking-usvc - containerPort: 8000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- - -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVICE_HOST_IP - value: chatqna-llm-uservice - - name: RERANK_SERVICE_HOST_IP - value: chatqna-reranking-usvc - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVICE_HOST_IP - value: chatqna-embedding-usvc - - name: MODEL_ID - value: "meta-llama/Meta-Llama-3.1-8B-Instruct" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna-wrapper:latest" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml deleted file mode 100644 index 26813816ed..0000000000 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml +++ /dev/null @@ -1,1065 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-config - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "Intel/neural-chat-7b-v3-3" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - CUDA_GRAPHS: "0" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi - envFrom: - - configMapRef: - name: chatqna-tgi-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVER_HOST_IP - value: chatqna-tgi - - name: RERANK_SERVER_HOST_IP - value: chatqna-teirerank - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVER_HOST_IP - value: chatqna-tei - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna:latest" - imagePullPolicy: Always - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml deleted file mode 100644 index aac57140b7..0000000000 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna_bf16.yaml +++ /dev/null @@ -1,1068 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-config - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "Intel/neural-chat-7b-v3-3" - DTYPE: "bfloat16" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - CUDA_GRAPHS: "0" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - spec: - nodeSelector: - node-type: node-bfloat16 - securityContext: - {} - containers: - - name: tgi - envFrom: - - configMapRef: - name: chatqna-tgi-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVER_HOST_IP - value: chatqna-tgi - - name: RERANK_SERVER_HOST_IP - value: chatqna-teirerank - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVER_HOST_IP - value: chatqna-tei - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna:latest" - imagePullPolicy: Always - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml deleted file mode 100644 index a96b7ad7fb..0000000000 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-guardrails.yaml +++ /dev/null @@ -1,1332 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-guardrails-usvc-config - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - SAFETY_GUARD_ENDPOINT: "http://chatqna-tgi-guardrails" - SAFETY_GUARD_MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - LOGFLAG: "" - http_proxy: "" - https_proxy: "" - no_proxy: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi-guardrails/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-guardrails-config - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B" - PORT: "2083" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - MAX_INPUT_LENGTH: "1024" - MAX_TOTAL_TOKENS: "2048" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-config - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "Intel/neural-chat-7b-v3-3" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - MAX_INPUT_LENGTH: "1024" - MAX_TOTAL_TOKENS: "2048" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/guardrails-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-guardrails-usvc - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9090 - targetPort: 9090 - protocol: TCP - name: guardrails-usvc - selector: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi-guardrails/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi-guardrails - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2083 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-guardrails-usvc - labels: - helm.sh/chart: guardrails-usvc-1.0.0 - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: guardrails-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-guardrails-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/guardrails-tgi:latest" - imagePullPolicy: Always - ports: - - name: guardrails-usvc - containerPort: 9090 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: guardrails-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tei-gaudi:1.5.0" - imagePullPolicy: IfNotPresent - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi-guardrails/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi-guardrails - labels: - helm.sh/chart: tgi-guardrails-1.0.0 - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi-guardrails - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi-guardrails - envFrom: - - configMapRef: - name: chatqna-tgi-guardrails-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2083 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi - envFrom: - - configMapRef: - name: chatqna-tgi-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVER_HOST_IP - value: chatqna-tgi - - name: RERANK_SERVER_HOST_IP - value: chatqna-teirerank - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVER_HOST_IP - value: chatqna-tei - - name: GUARDRAIL_SERVICE_HOST_IP - value: chatqna-guardrails-usvc - - name: GUARDRAIL_SERVICE_PORT - value: "9090" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna-guardrails:latest" - imagePullPolicy: Always - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml deleted file mode 100644 index 805707dc49..0000000000 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml +++ /dev/null @@ -1,1067 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "insert-your-remote-embedding-endpoint" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" ---- -# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-embedding-usvc-config - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "insert-your-remote-embedding-endpoint" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" ---- -# Source: chatqna/charts/llm-uservice/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-llm-uservice-config - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - vLLM_ENDPOINT: "insert-your-remote-vllm-inference-endpoint" - LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct" - MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" ---- -# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-reranking-usvc-config - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_RERANKING_ENDPOINT: "insert-your-remote-reranking-endpoint" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "insert-your-remote-embedding-endpoint" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" - CLIENTID: "" - CLIENT_SECRET: "" - TOKEN_URL: "" - ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: 5173 - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/embedding-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6000 - targetPort: 6000 - protocol: TCP - name: embedding-usvc - selector: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/llm-uservice/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - name: llm-uservice - selector: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/reranking-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8000 - targetPort: 8000 - protocol: TCP - name: reranking-usvc - selector: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-embedding-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/embedding-tei:latest" - imagePullPolicy: Always - ports: - - name: embedding-usvc - containerPort: 6000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/llm-uservice/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-llm-uservice-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/llm-vllm:latest" - imagePullPolicy: Always - ports: - - name: llm-uservice - containerPort: 9000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-reranking-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/reranking-tei:latest" - imagePullPolicy: Always - ports: - - name: reranking-usvc - containerPort: 8000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVICE_HOST_IP - value: chatqna-llm-uservice - - name: RERANK_SERVICE_HOST_IP - value: chatqna-reranking-usvc - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVICE_HOST_IP - value: chatqna-embedding-usvc - - name: MODEL_ID - value: "meta-llama/Meta-Llama-3.1-8B-Instruct" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna-wrapper:latest" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume ---- -# Source: chatqna/charts/tei/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/tei/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/templates/customMetrics.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml deleted file mode 100644 index 715db8976f..0000000000 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm.yaml +++ /dev/null @@ -1,1454 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-embedding-usvc-config - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/llm-uservice/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-llm-uservice-config - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" - vLLM_ENDPOINT: "http://chatqna-vllm" - LLM_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct" - MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct" ---- -# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-reranking-usvc-config - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-vllm-config - labels: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" -data: - MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" - OMPI_MCA_btl_vader_single_copy_mechanism: "none" - HF_HOME: "/tmp/.cache/huggingface" - GPU_MEMORY_UTILIZATION: "0.5" - DTYPE: "auto" - TENSOR_PARALLEL_SIZE: "1" - BLOCK_SIZE: "128" - MAX_NUM_SEQS: "256" - MAX_SEQ_LEN_TO_CAPTURE: "2048" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: 5173 - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/embedding-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6000 - targetPort: 6000 - protocol: TCP - name: embedding-usvc - selector: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/llm-uservice/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9000 - targetPort: 9000 - protocol: TCP - name: llm-uservice - selector: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/reranking-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8000 - targetPort: 8000 - protocol: TCP - name: reranking-usvc - selector: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-vllm - labels: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: vllm - selector: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-embedding-usvc - labels: - helm.sh/chart: embedding-usvc-1.0.0 - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: embedding-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-embedding-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/embedding-tei:latest" - imagePullPolicy: Always - ports: - - name: embedding-usvc - containerPort: 6000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/llm-uservice/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-llm-uservice - labels: - helm.sh/chart: llm-uservice-1.0.0 - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: llm-uservice - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-llm-uservice-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/llm-vllm:latest" - imagePullPolicy: Always - ports: - - name: llm-uservice - containerPort: 9000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: llm-uservice - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-reranking-usvc - labels: - helm.sh/chart: reranking-usvc-1.0.0 - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: reranking-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-reranking-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/reranking-tei:latest" - imagePullPolicy: Always - ports: - - name: reranking-usvc - containerPort: 8000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: reranking-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-0.8.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - {} - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: IfNotPresent - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - - volumes: - - name: model-volume # Replace with Persistent volume claim/ host directory - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tei-gaudi:1.5.0" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume # Replace with Persistent volume claim/ host directory - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-vllm - labels: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: vllm - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: vllm - envFrom: - - configMapRef: - name: chatqna-vllm-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/vllm-gaudi:latest" - args: - - "--enforce-eager" - - "--model" - - "$(MODEL_ID)" - - "--tensor-parallel-size" - - "1" - - "--gpu-memory-utilization" - - "$(GPU_MEMORY_UTILIZATION)" - - "--dtype" - - "$(DTYPE)" - - "--max-num-seqs" - - "$(MAX_NUM_SEQS)" - - "--block-size" - - "$(BLOCK_SIZE)" - - "--max-seq-len-to-capture" - - "$(MAX_SEQ_LEN_TO_CAPTURE)" - - "--host" - - "0.0.0.0" - - "--port" - - "$(PORT)" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume # Replace with Persistent volume claim/ host directory - emptyDir: {} - - name: tmp - emptyDir: {} - ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVICE_HOST_IP - value: chatqna-llm-uservice - - name: RERANK_SERVICE_HOST_IP - value: chatqna-reranking-usvc - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVICE_HOST_IP - value: chatqna-embedding-usvc - - name: MODEL_ID - value: "meta-llama/Meta-Llama-3-8B-Instruct" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna-wrapper:latest" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume ---- -# Source: chatqna/charts/tei/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/tei/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/templates/customMetrics.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml deleted file mode 100644 index 7c31d09d67..0000000000 --- a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna.yaml +++ /dev/null @@ -1,1101 +0,0 @@ ---- -# Source: chatqna/charts/data-prep/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-data-prep-config - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - KEY_INDEX_NAME: "file-keys" - SEARCH_BATCH_SIZE: "10" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" - LOGFLAG: "" ---- -# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-retriever-usvc-config - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -data: - TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei" - EMBED_MODEL: "" - REDIS_URL: "redis://chatqna-redis-vector-db:6379" - INDEX_NAME: "rag-redis" - EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - LOGFLAG: "" ---- -# Source: chatqna/charts/tei/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tei-config - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-base-en-v1.5" - PORT: "2081" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" - MAX_WARMUP_SEQUENCE_LENGTH: "512" ---- -# Source: chatqna/charts/teirerank/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-teirerank-config - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "BAAI/bge-reranker-base" - PORT: "2082" - http_proxy: "" - https_proxy: "" - no_proxy: "" - NUMBA_CACHE_DIR: "/tmp" - TRANSFORMERS_CACHE: "/tmp/transformers_cache" - HF_HOME: "/tmp/.cache/huggingface" ---- -# Source: chatqna/charts/tgi/templates/configmap.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: chatqna-tgi-config - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -data: - MODEL_ID: "Intel/neural-chat-7b-v3-3" - PORT: "2080" - HF_TOKEN: "insert-your-huggingface-token-here" - http_proxy: "" - https_proxy: "" - no_proxy: "" - HABANA_LOGS: "/tmp/habana_logs" - NUMBA_CACHE_DIR: "/tmp" - HF_HOME: "/tmp/.cache/huggingface" - MAX_INPUT_LENGTH: "1024" - MAX_TOTAL_TOKENS: "2048" ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -data: - default.conf: |+ - # Copyright (C) 2024 Intel Corporation - # SPDX-License-Identifier: Apache-2.0 - - - server { - listen 80; - listen [::]:80; - - proxy_connect_timeout 600; - proxy_send_timeout 600; - proxy_read_timeout 600; - send_timeout 600; - - client_max_body_size 10G; - - location /home { - alias /usr/share/nginx/html/index.html; - } - - location / { - proxy_pass http://chatqna-chatqna-ui:5173; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/chatqna { - proxy_pass http://chatqna:8888; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/get_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - - location /v1/dataprep/delete_file { - proxy_pass http://chatqna-data-prep:6007; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } - } - -kind: ConfigMap -metadata: - name: chatqna-nginx-config ---- -# Source: chatqna/charts/chatqna-ui/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 5173 - targetPort: ui - protocol: TCP - name: ui - selector: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/data-prep/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6007 - targetPort: 6007 - protocol: TCP - name: data-prep - selector: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/redis-vector-db/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 6379 - targetPort: 6379 - protocol: TCP - name: redis-service - - port: 8001 - targetPort: 8001 - protocol: TCP - name: redis-insight - selector: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/retriever-usvc/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 7000 - targetPort: 7000 - protocol: TCP - name: retriever-usvc - selector: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tei/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2081 - protocol: TCP - name: tei - selector: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/teirerank/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2082 - protocol: TCP - name: teirerank - selector: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/charts/tgi/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 2080 - protocol: TCP - name: tgi - selector: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: v1 -kind: Service -metadata: - name: chatqna-nginx -spec: - ports: - - port: 80 - protocol: TCP - targetPort: 80 - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - type: NodePort ---- -# Source: chatqna/templates/service.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: Service -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8888 - targetPort: 8888 - protocol: TCP - name: chatqna - selector: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna ---- -# Source: chatqna/charts/chatqna-ui/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-chatqna-ui - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - helm.sh/chart: chatqna-ui-1.0.0 - app.kubernetes.io/name: chatqna-ui - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - spec: - securityContext: - {} - containers: - - name: chatqna-ui - securityContext: - {} - image: "opea/chatqna-ui:latest" - imagePullPolicy: Always - ports: - - name: ui - containerPort: 5173 - protocol: TCP - resources: - {} - volumeMounts: - - mountPath: /tmp - name: tmp - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/data-prep/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-data-prep - labels: - helm.sh/chart: data-prep-1.0.0 - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: data-prep - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-data-prep-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/dataprep-redis:latest" - imagePullPolicy: Always - ports: - - name: data-prep - containerPort: 6007 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: data-prep - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-redis-vector-db - labels: - helm.sh/chart: redis-vector-db-1.0.0 - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "7.2.0-v9" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: redis-vector-db - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: redis-vector-db - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "redis/redis-stack:7.2.0-v9" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: data-volume - - mountPath: /redisinsight - name: redisinsight-volume - - mountPath: /tmp - name: tmp - ports: - - name: redis-service - containerPort: 6379 - protocol: TCP - - name: redis-insight - containerPort: 8001 - protocol: TCP - startupProbe: - tcpSocket: - port: 6379 # Probe the Redis port - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 120 - resources: - {} - volumes: - - name: data-volume - emptyDir: {} - - name: redisinsight-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-retriever-usvc - labels: - helm.sh/chart: retriever-usvc-1.0.0 - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: retriever-usvc - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: chatqna - envFrom: - - configMapRef: - name: chatqna-retriever-usvc-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/retriever-redis:latest" - imagePullPolicy: Always - ports: - - name: retriever-usvc - containerPort: 7000 - protocol: TCP - volumeMounts: - - mountPath: /tmp - name: tmp - livenessProbe: - failureThreshold: 24 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: v1/health_check - port: retriever-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tei/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tei - labels: - helm.sh/chart: tei-1.0.0 - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tei - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tei - envFrom: - - configMapRef: - name: chatqna-tei-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: false - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tei-gaudi:1.5.0" - imagePullPolicy: IfNotPresent - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2081 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/teirerank/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-teirerank - labels: - helm.sh/chart: teirerank-1.0.0 - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "cpu-1.5" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: teirerank - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: teirerank - envFrom: - - configMapRef: - name: chatqna-teirerank-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" - imagePullPolicy: Always - args: - - "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2082 - protocol: TCP - livenessProbe: - failureThreshold: 24 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - readinessProbe: - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - startupProbe: - failureThreshold: 120 - httpGet: - path: /health - port: http - initialDelaySeconds: 5 - periodSeconds: 5 - resources: - {} - volumes: - - name: model-volume - emptyDir: {} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - - name: tmp - emptyDir: {} ---- -# Source: chatqna/charts/tgi/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-tgi - labels: - helm.sh/chart: tgi-1.0.0 - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "2.1.0" - app.kubernetes.io/managed-by: Helm -spec: - # use explicit replica counts only of HorizontalPodAutoscaler is disabled - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: tgi - app.kubernetes.io/instance: chatqna - spec: - securityContext: - {} - containers: - - name: tgi - envFrom: - - configMapRef: - name: chatqna-tgi-config - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "ghcr.io/huggingface/tgi-gaudi:2.0.6" - imagePullPolicy: Always - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /tmp - name: tmp - ports: - - name: http - containerPort: 2080 - protocol: TCP - livenessProbe: - failureThreshold: 24 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - startupProbe: - failureThreshold: 120 - initialDelaySeconds: 5 - periodSeconds: 5 - tcpSocket: - port: http - resources: - limits: - habana.ai/gaudi: 1 - volumes: - - name: model-volume - emptyDir: {} - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/deployment.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna - spec: - securityContext: - null - containers: - - name: chatqna - env: - - name: LLM_SERVER_HOST_IP - value: chatqna-tgi - - name: RERANK_SERVER_HOST_IP - value: chatqna-teirerank - - name: RETRIEVER_SERVICE_HOST_IP - value: chatqna-retriever-usvc - - name: EMBEDDING_SERVER_HOST_IP - value: chatqna-tei - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - image: "opea/chatqna:latest" - imagePullPolicy: Always - volumeMounts: - - mountPath: /tmp - name: tmp - ports: - - name: chatqna - containerPort: 8888 - protocol: TCP - resources: - null - volumes: - - name: tmp - emptyDir: {} ---- -# Source: chatqna/templates/nginx-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-nginx - labels: - helm.sh/chart: chatqna-1.0.0 - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app.kubernetes.io/version: "v1.0" - app.kubernetes.io/managed-by: Helm - app: chatqna-nginx -spec: - selector: - matchLabels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - template: - metadata: - labels: - app.kubernetes.io/name: chatqna - app.kubernetes.io/instance: chatqna - app: chatqna-nginx - spec: - containers: - - image: nginx:1.27.1 - imagePullPolicy: Always - name: nginx - volumeMounts: - - mountPath: /etc/nginx/conf.d - name: nginx-config-volume - securityContext: {} - volumes: - - configMap: - defaultMode: 420 - name: chatqna-nginx-config - name: nginx-config-volume ---- -# Source: chatqna/charts/tei/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/tei/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/horizontalPodAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/teirerank/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/tgi/templates/horizontalPorAutoscaler.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 ---- -# Source: chatqna/charts/tgi/templates/servicemonitor.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -# Dashboard for the exposed TGI metrics: -# - https://grafana.com/grafana/dashboards/19831-text-generation-inference-dashboard/ -# Metric descriptions: -# - https://github.com/huggingface/text-generation-inference/discussions/1127#discussioncomment-7240527 ---- -# Source: chatqna/templates/customMetrics.yaml -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0