From b80482c81d3ea8c9826765ccdcc7153dbda32ae3 Mon Sep 17 00:00:00 2001 From: Trevor Royer Date: Mon, 22 Apr 2024 16:36:50 -0600 Subject: [PATCH] add health check objects for OpenShift AI --- .../health-check-openshift-ai/README.md | 31 ++++++++++ .../kustomization.yaml | 15 +++++ ...patch-datasciencecluster-health-check.yaml | 56 +++++++++++++++++++ ...iencepipelineapplication-health-check.yaml | 42 ++++++++++++++ .../patch-notebook-health-check.yaml | 42 ++++++++++++++ 5 files changed, 186 insertions(+) create mode 100644 openshift-gitops-operator/instance/components/health-check-openshift-ai/README.md create mode 100644 openshift-gitops-operator/instance/components/health-check-openshift-ai/kustomization.yaml create mode 100644 openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencecluster-health-check.yaml create mode 100644 openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencepipelineapplication-health-check.yaml create mode 100644 openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-notebook-health-check.yaml diff --git a/openshift-gitops-operator/instance/components/health-check-openshift-ai/README.md b/openshift-gitops-operator/instance/components/health-check-openshift-ai/README.md new file mode 100644 index 00000000..0b88da91 --- /dev/null +++ b/openshift-gitops-operator/instance/components/health-check-openshift-ai/README.md @@ -0,0 +1,31 @@ +# health-check-openshift-ai + +## Purpose +This component is designed to enable custom health checks for OpenShift AI. + +## Not Yet Imlimented + +Several custom resources created by OpenShift AI have not yet been implimented with Health Checks. Those include the following: + +KServe/ModelMesh: + +- servingruntimes +- inferenceservice +- inferencegraph + +Distributed Compute + +## Usage + +This component can be added to a base by adding the `components` section to your overlay `kustomization.yaml` file: + +``` +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +components: + - ../../components/health-check-openshift-ai +``` diff --git a/openshift-gitops-operator/instance/components/health-check-openshift-ai/kustomization.yaml b/openshift-gitops-operator/instance/components/health-check-openshift-ai/kustomization.yaml new file mode 100644 index 00000000..10470006 --- /dev/null +++ b/openshift-gitops-operator/instance/components/health-check-openshift-ai/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +patches: + - path: patch-datasciencecluster-health-check.yaml + target: + kind: ArgoCD + - path: patch-datasciencepipelineapplication-health-check.yaml + target: + kind: ArgoCD + - path: patch-notebook-health-check.yaml + target: + kind: ArgoCD + + diff --git a/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencecluster-health-check.yaml b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencecluster-health-check.yaml new file mode 100644 index 00000000..75c07068 --- /dev/null +++ b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencecluster-health-check.yaml @@ -0,0 +1,56 @@ +- op: add + path: /spec/resourceHealthChecks/- + value: + group: datasciencecluster.opendatahub.io + kind: DataScienceCluster + check: | + health_status = {} + if obj.status ~= nil then + if obj.status.conditions ~= nil then + msg = "" + componentsDegraded = 0 + available = false + progressing = false + degraded = false + for i, condition in pairs(obj.status.conditions) do + + if condition.type == "Available" and condition.status == "True" then + available = true + elseif condition.type == "Progressing" then + if condition.status == "True" then + progressing = true + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" + end + elseif condition.type == "Degraded" then + if condition.status == "True" then + degraded = true + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" + end + elseif condition.status == "False" then + componentsDegraded = componentsDegraded + 1 + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" + end + + end + + if available == true and componentsDegraded == 0 then + health_status.status = "Healthy" + elseif progressing == true then + health_status.status = "Progressing" + elseif degraded == true or componentsDegraded > 0 then + health_status.status = "Degraded" + else + health_status.status = "Degraded" + end + + health_status.message = msg + else + health_status.status = "Progressing" + health_status.message = "DataScienceCluster is creating..." + end + else + health_status.status = "Progressing" + health_status.message = "DataScienceCluster is creating..." + end + + return health_status diff --git a/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencepipelineapplication-health-check.yaml b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencepipelineapplication-health-check.yaml new file mode 100644 index 00000000..e8ad1371 --- /dev/null +++ b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-datasciencepipelineapplication-health-check.yaml @@ -0,0 +1,42 @@ +- op: add + path: /spec/resourceHealthChecks/- + value: + group: datasciencepipelinesapplications.opendatahub.io + kind: DataSciencePipelinesApplication + check: | + health_status = {} + if obj.status ~= nil then + if obj.status.conditions ~= nil then + msg = "" + progressing = false + degraded = false + for i, condition in pairs(obj.status.conditions) do + + if condition.status == "False" then + progressing = true + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" + end + + end + + if progressing == false and degraded == false then + health_status.status = "Healthy" + elseif progressing == true then + health_status.status = "Progressing" + elseif degraded == false then + -- there is no condition that can help to distinguish between a degraded and progressing object + -- for now, we will just always keep the object as progressing and never set it to degraded + health_status.status = "Degraded" + end + + health_status.message = msg + else + health_status.status = "Progressing" + health_status.message = "DataSciencePipelineApplication is creating..." + end + else + health_status.status = "Progressing" + health_status.message = "DataSciencePipelineApplication is creating..." + end + + return health_status diff --git a/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-notebook-health-check.yaml b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-notebook-health-check.yaml new file mode 100644 index 00000000..3051e8c5 --- /dev/null +++ b/openshift-gitops-operator/instance/components/health-check-openshift-ai/patch-notebook-health-check.yaml @@ -0,0 +1,42 @@ +- op: add + path: /spec/resourceHealthChecks/- + value: + group: kubeflow.org + kind: Notebook + check: | + health_status = {} + if obj.status ~= nil then + if obj.status.conditions ~= nil then + msg = "" + progressing = false + degraded = false + for i, condition in pairs(obj.status.conditions) do + + if condition.status == "False" then + progressing = true + msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. " | " .. condition.reason .. " | " .. condition.message .. "\n" + end + + end + + if progressing == false and degraded == false then + health_status.status = "Healthy" + elseif progressing == true then + health_status.status = "Progressing" + elseif degraded == false then + -- there is no condition that can help to distinguish between a degraded and progressing object + -- for now, we will just always keep the object as progressing and never set it to degraded + health_status.status = "Degraded" + end + + health_status.message = msg + else + health_status.status = "Progressing" + health_status.message = "Notebook is creating..." + end + else + health_status.status = "Progressing" + health_status.message = "Notebook is creating..." + end + + return health_status