From 354bf9ebde950a5147bf1a72e68c2320a68602f5 Mon Sep 17 00:00:00 2001 From: Christian Zunker <827818+czunker@users.noreply.github.com> Date: Thu, 16 Nov 2023 17:30:12 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Report=20on=20OOMkilled=20status=20?= =?UTF-8?q?(#928)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Report on OOMkilled status Signed-off-by: Christian Zunker --- api/v1alpha2/mondooauditconfig_types.go | 4 + api/v1alpha2/zz_generated.deepcopy.go | 5 + .../k8s.mondoo.com_mondooauditconfigs.yaml | 10 ++ controllers/admission/conditions.go | 17 +- controllers/admission/deployment_handler.go | 27 +++- controllers/container_image/conditions.go | 23 ++- .../container_image/deployment_handler.go | 20 ++- .../deployment_handler_test.go | 6 +- .../integration/integration_controller.go | 2 +- controllers/k8s_scan/conditions.go | 23 ++- controllers/k8s_scan/deployment_handler.go | 18 ++- .../k8s_scan/deployment_handler_test.go | 8 +- controllers/mondooauditconfig_controller.go | 96 +++++++++++ .../mondooauditconfig_controller_test.go | 101 ++++++++++++ controllers/nodes/conditions.go | 19 ++- controllers/nodes/deployment_handler.go | 18 ++- controllers/nodes/deployment_handler_test.go | 94 +++++++++++ controllers/scanapi/conditions.go | 17 +- controllers/scanapi/deployment_handler.go | 17 +- .../scanapi/deployment_handler_test.go | 76 +++++++++ controllers/status/operator_status.go | 65 +++++++- controllers/status/operator_status_test.go | 149 +++++++++++++++++- controllers/status/status_reporter.go | 2 +- pkg/utils/mondoo/condition.go | 6 + 24 files changed, 772 insertions(+), 51 deletions(-) diff --git a/api/v1alpha2/mondooauditconfig_types.go b/api/v1alpha2/mondooauditconfig_types.go index 925512107..cfafa457d 100644 --- a/api/v1alpha2/mondooauditconfig_types.go +++ b/api/v1alpha2/mondooauditconfig_types.go @@ -179,6 +179,10 @@ type MondooAuditConfigCondition struct { Reason string `json:"reason,omitempty"` // Message is a human-readable message indicating details about the last transition Message string `json:"message,omitempty"` + // AffectedPods, when filled, contains a list which are affected by an issue + AffectedPods []string `json:"affectedPods,omitempty"` + // MemoryLimit contains the currently active memory limit for a Pod + MemoryLimit string `json:"memoryLimit,omitempty"` } // MondooOperatorConfigConditionType is a valid value for MondooOperatorConfig.Status.Condition[].Type diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index 05b1796bf..fd4c793f1 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -206,6 +206,11 @@ func (in *MondooAuditConfigCondition) DeepCopyInto(out *MondooAuditConfigConditi *out = *in in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime) in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + if in.AffectedPods != nil { + in, out := &in.AffectedPods, &out.AffectedPods + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MondooAuditConfigCondition. diff --git a/config/crd/bases/k8s.mondoo.com_mondooauditconfigs.yaml b/config/crd/bases/k8s.mondoo.com_mondooauditconfigs.yaml index 4e8e0b40a..711a6dbc6 100644 --- a/config/crd/bases/k8s.mondoo.com_mondooauditconfigs.yaml +++ b/config/crd/bases/k8s.mondoo.com_mondooauditconfigs.yaml @@ -478,6 +478,12 @@ spec: description: Conditions includes detailed status for the MondooAuditConfig items: properties: + affectedPods: + description: AffectedPods, when filled, contains a list which + are affected by an issue + items: + type: string + type: array lastTransitionTime: description: LastTransitionTime is the last time the condition transitioned from one status to another. @@ -487,6 +493,10 @@ spec: description: LastUpdateTime is the last time we probed the condition format: date-time type: string + memoryLimit: + description: MemoryLimit contains the currently active memory + limit for a Pod + type: string message: description: Message is a human-readable message indicating details about the last transition diff --git a/controllers/admission/conditions.go b/controllers/admission/conditions.go index 26005f864..25acae973 100644 --- a/controllers/admission/conditions.go +++ b/controllers/admission/conditions.go @@ -9,17 +9,30 @@ import ( corev1 "k8s.io/api/core/v1" ) -func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool) { +func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) { msg := "Admission controller is available" reason := "AdmissionAvailable" status := corev1.ConditionFalse updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" if !config.Spec.Admission.Enable { msg = "Admission controller is disabled" reason = "AdmissionDisabled" status = corev1.ConditionFalse } else if degradedStatus { msg = "Admission controller is unavailable" + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + if status.LastTerminationState.Terminated != nil && status.LastTerminationState.Terminated.ExitCode == 137 { + // TODO: double check container name? + msg = "Admission controller is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String() + break + } + } + } reason = "AdmissionUnvailable" status = corev1.ConditionTrue condition := mondoo.FindMondooAuditConditions(config.Status.Conditions, mondoov1alpha2.ScanAPIDegraded) @@ -28,5 +41,5 @@ func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degrade } } - config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.AdmissionDegraded, status, reason, msg, updateCheck) + config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.AdmissionDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) } diff --git a/controllers/admission/deployment_handler.go b/controllers/admission/deployment_handler.go index 96f36d06c..6d1a40899 100644 --- a/controllers/admission/deployment_handler.go +++ b/controllers/admission/deployment_handler.go @@ -238,8 +238,8 @@ func (n *DeploymentHandler) syncWebhookDeployment(ctx context.Context) error { webhookLog.V(3).Info("Webhook deployment is only scaled to 1 replica, but the webhook mode is set to 'enforcing'. This might be problematic if the API server is not able to connect to the webhook. Please consider increasing the replicas.") } - deployment := &appsv1.Deployment{} - created, err := k8s.CreateIfNotExist(ctx, n.KubeClient, deployment, desiredDeployment) + existingDeployment := &appsv1.Deployment{} + created, err := k8s.CreateIfNotExist(ctx, n.KubeClient, existingDeployment, desiredDeployment) if err != nil { webhookLog.Error(err, "failed to create Deployment for webhook") return err @@ -250,18 +250,31 @@ func (n *DeploymentHandler) syncWebhookDeployment(ctx context.Context) error { return nil } - updateAdmissionConditions(n.Mondoo, n.isWebhookDegraded(deployment)) + // Get Pods for this deployment + selector, _ := metav1.LabelSelectorAsSelector(existingDeployment.Spec.Selector) + opts := []client.ListOption{ + client.InNamespace(existingDeployment.Namespace), + client.MatchingLabelsSelector{Selector: selector}, + } + pods := &corev1.PodList{} + err = n.KubeClient.List(ctx, pods, opts...) + if err != nil { + webhookLog.Error(err, "Failed to list Pods for Admission controller") + return err + } + + updateAdmissionConditions(n.Mondoo, n.isWebhookDegraded(existingDeployment), pods) // Not a full check for whether someone has modified our Deployment, but checking for some important bits so we know // if an Update() is needed. - if !k8s.AreDeploymentsEqual(*deployment, *desiredDeployment) { + if !k8s.AreDeploymentsEqual(*existingDeployment, *desiredDeployment) { // Note: changes to the labels/selector labels means we can't Update() the // Deployment, so we'll do a delete/create instead. - if err := k8s.DeleteIfExists(ctx, n.KubeClient, deployment); err != nil { + if err := k8s.DeleteIfExists(ctx, n.KubeClient, existingDeployment); err != nil { webhookLog.Error(err, "failed to delete exising webhook Deployment") return err } - if _, err := k8s.CreateIfNotExist(ctx, n.KubeClient, deployment, desiredDeployment); err != nil { + if _, err := k8s.CreateIfNotExist(ctx, n.KubeClient, existingDeployment, desiredDeployment); err != nil { webhookLog.Error(err, "failed to replace exising webhook Deployment") return err } @@ -526,7 +539,7 @@ func (n *DeploymentHandler) down(ctx context.Context) (ctrl.Result, error) { } // Make sure to clear any degraded status - updateAdmissionConditions(n.Mondoo, false) + updateAdmissionConditions(n.Mondoo, false, &corev1.PodList{}) return ctrl.Result{}, nil } diff --git a/controllers/container_image/conditions.go b/controllers/container_image/conditions.go index e72aeabcc..078a34b5a 100644 --- a/controllers/container_image/conditions.go +++ b/controllers/container_image/conditions.go @@ -9,21 +9,36 @@ import ( corev1 "k8s.io/api/core/v1" ) -func updateImageScanningConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool) { - msg := "Kubernetes Container Image Scanning is Available" +func updateImageScanningConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) { + msg := "Kubernetes Container Image Scanning is available" reason := "KubernetesContainerImageScanningAvailable" status := corev1.ConditionFalse updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" if !config.Spec.KubernetesResources.ContainerImageScanning && !config.Spec.Containers.Enable { msg = "Kubernetes Container Image Scanning is disabled" reason = "KubernetesContainerImageScanningDisabled" status = corev1.ConditionFalse } else if degradedStatus { - msg = "Kubernetes Container Image Scanning is Unavailable" + msg = "Kubernetes Container Image Scanning is unavailable" reason = "KubernetesContainerImageScanningUnavailable" status = corev1.ConditionTrue } + for _, pod := range pods.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137 { + // TODO: double check container name? + msg = "Kubernetes Container Image Scanning is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String() + reason = "KubernetesContainerImageScanningUnavailable" + status = corev1.ConditionTrue + } + } + } + config.Status.Conditions = mondoo.SetMondooAuditCondition( - config.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded, status, reason, msg, updateCheck) + config.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) } diff --git a/controllers/container_image/deployment_handler.go b/controllers/container_image/deployment_handler.go index 8ec0c56a7..b50977508 100644 --- a/controllers/container_image/deployment_handler.go +++ b/controllers/container_image/deployment_handler.go @@ -139,7 +139,23 @@ func (n *DeploymentHandler) syncCronJob(ctx context.Context) error { return err } - updateImageScanningConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs)) + // Get Pods for this CronJob + pods := &corev1.PodList{} + if len(cronJobs) > 0 { + lSelector := metav1.SetAsLabelSelector(CronJobLabels(*n.Mondoo)) + selector, _ := metav1.LabelSelectorAsSelector(lSelector) + opts := []client.ListOption{ + client.InNamespace(n.Mondoo.Namespace), + client.MatchingLabelsSelector{Selector: selector}, + } + err = n.KubeClient.List(ctx, pods, opts...) + if err != nil { + logger.Error(err, "Failed to list Pods for Kubernetes Container Image Scanning") + return err + } + } + + updateImageScanningConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs), pods) return nil } @@ -214,7 +230,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error { } // Clear any remnant status - updateImageScanningConditions(n.Mondoo, false) + updateImageScanningConditions(n.Mondoo, false, &corev1.PodList{}) return nil } diff --git a/controllers/container_image/deployment_handler_test.go b/controllers/container_image/deployment_handler_test.go index 97c0f09b3..f5bb3ded7 100644 --- a/controllers/container_image/deployment_handler_test.go +++ b/controllers/container_image/deployment_handler_test.go @@ -278,7 +278,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus() // Verify the image scanning status is set to available s.Equal(1, len(d.Mondoo.Status.Conditions)) condition := d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Container Image Scanning is Available", condition.Message) + s.Equal("Kubernetes Container Image Scanning is available", condition.Message) s.Equal("KubernetesContainerImageScanningAvailable", condition.Reason) s.Equal(corev1.ConditionFalse, condition.Status) @@ -300,7 +300,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus() // Verify the image scanning status is set to unavailable condition = d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Container Image Scanning is Unavailable", condition.Message) + s.Equal("Kubernetes Container Image Scanning is unavailable", condition.Message) s.Equal("KubernetesContainerImageScanningUnavailable", condition.Reason) s.Equal(corev1.ConditionTrue, condition.Status) @@ -316,7 +316,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus() // Verify the image scanning status is set to available condition = d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Container Image Scanning is Available", condition.Message) + s.Equal("Kubernetes Container Image Scanning is available", condition.Message) s.Equal("KubernetesContainerImageScanningAvailable", condition.Reason) s.Equal(corev1.ConditionFalse, condition.Status) diff --git a/controllers/integration/integration_controller.go b/controllers/integration/integration_controller.go index c72742fb6..993b1cf25 100644 --- a/controllers/integration/integration_controller.go +++ b/controllers/integration/integration_controller.go @@ -167,5 +167,5 @@ func updateIntegrationCondition(config *v1alpha2.MondooAuditConfig, degradedStat msg = customMessage } - config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, v1alpha2.MondooIntegrationDegraded, status, reason, msg, updateCheck) + config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, v1alpha2.MondooIntegrationDegraded, status, reason, msg, updateCheck, []string{}, "") } diff --git a/controllers/k8s_scan/conditions.go b/controllers/k8s_scan/conditions.go index 3742d5196..7f7ddeae4 100644 --- a/controllers/k8s_scan/conditions.go +++ b/controllers/k8s_scan/conditions.go @@ -9,21 +9,36 @@ import ( corev1 "k8s.io/api/core/v1" ) -func updateWorkloadsConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool) { - msg := "Kubernetes Resources Scanning is Available" +func updateWorkloadsConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) { + msg := "Kubernetes Resources Scanning is available" reason := "KubernetesResourcesScanningAvailable" status := corev1.ConditionFalse updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" if !config.Spec.KubernetesResources.Enable { msg = "Kubernetes Resources Scanning is disabled" reason = "KubernetesResourcesScanningDisabled" status = corev1.ConditionFalse } else if degradedStatus { - msg = "Kubernetes Resources Scanning is Unavailable" + msg = "Kubernetes Resources Scanning is unavailable" reason = "KubernetesResourcesScanningUnavailable" status = corev1.ConditionTrue } + for _, pod := range pods.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137 { + // TODO: double check container name? + msg = "Kubernetes Resources Scanning is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String() + reason = "KubernetesResourcesScanningUnavailable" + status = corev1.ConditionTrue + } + } + } + config.Status.Conditions = mondoo.SetMondooAuditCondition( - config.Status.Conditions, v1alpha2.K8sResourcesScanningDegraded, status, reason, msg, updateCheck) + config.Status.Conditions, v1alpha2.K8sResourcesScanningDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) } diff --git a/controllers/k8s_scan/deployment_handler.go b/controllers/k8s_scan/deployment_handler.go index 1f7ab637b..e4bfe3a4f 100644 --- a/controllers/k8s_scan/deployment_handler.go +++ b/controllers/k8s_scan/deployment_handler.go @@ -107,7 +107,21 @@ func (n *DeploymentHandler) syncCronJob(ctx context.Context) error { return err } - updateWorkloadsConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs)) + // Get Pods for this CronJob + pods := &corev1.PodList{} + if len(cronJobs) > 0 { + opts := &client.ListOptions{ + Namespace: n.Mondoo.Namespace, + LabelSelector: labels.SelectorFromSet(CronJobLabels(*n.Mondoo)), + } + err = n.KubeClient.List(ctx, pods, opts) + if err != nil { + logger.Error(err, "Failed to list Pods for scan Kubernetes Reosurce Scanning") + return err + } + } + + updateWorkloadsConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs), pods) return n.cleanupWorkloadDeployment(ctx) } @@ -137,7 +151,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error { } // Clear any remnant status - updateWorkloadsConditions(n.Mondoo, false) + updateWorkloadsConditions(n.Mondoo, false, &corev1.PodList{}) return nil } diff --git a/controllers/k8s_scan/deployment_handler_test.go b/controllers/k8s_scan/deployment_handler_test.go index 866358048..d3ae5cd50 100644 --- a/controllers/k8s_scan/deployment_handler_test.go +++ b/controllers/k8s_scan/deployment_handler_test.go @@ -218,7 +218,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() { // Verify container image scanning and kubernetes resources conditions s.Equal(1, len(d.Mondoo.Status.Conditions)) condition := d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Resources Scanning is Available", condition.Message) + s.Equal("Kubernetes Resources Scanning is available", condition.Message) s.Equal("KubernetesResourcesScanningAvailable", condition.Reason) s.Equal(corev1.ConditionFalse, condition.Status) @@ -240,7 +240,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() { // Verify the kubernetes resources status is set to unavailable condition = d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Resources Scanning is Unavailable", condition.Message) + s.Equal("Kubernetes Resources Scanning is unavailable", condition.Message) s.Equal("KubernetesResourcesScanningUnavailable", condition.Reason) s.Equal(corev1.ConditionTrue, condition.Status) @@ -256,7 +256,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() { // Verify the kubernetes resources scanning status is set to available condition = d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Resources Scanning is Available", condition.Message) + s.Equal("Kubernetes Resources Scanning is available", condition.Message) s.Equal("KubernetesResourcesScanningAvailable", condition.Reason) s.Equal(corev1.ConditionFalse, condition.Status) @@ -274,7 +274,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() { // Verify the kubernetes resources scanning status is set to available when there is an active scan condition = d.Mondoo.Status.Conditions[0] - s.Equal("Kubernetes Resources Scanning is Available", condition.Message) + s.Equal("Kubernetes Resources Scanning is available", condition.Message) s.Equal("KubernetesResourcesScanningAvailable", condition.Reason) s.Equal(corev1.ConditionFalse, condition.Status) diff --git a/controllers/mondooauditconfig_controller.go b/controllers/mondooauditconfig_controller.go index 69bfcf940..a6346e7c7 100644 --- a/controllers/mondooauditconfig_controller.go +++ b/controllers/mondooauditconfig_controller.go @@ -314,6 +314,98 @@ func (r *MondooAuditConfigReconciler) nodeEventsRequestMapper(ctx context.Contex return requests } +// cronJobPodsRequestMapper watches Pods created by our CronJobs +// Otherwise we wouldn't be able to report OOM status on the spawned Pods +func (r *MondooAuditConfigReconciler) cronJobPodsRequestMapper(ctx context.Context, o client.Object) []reconcile.Request { + var requests []reconcile.Request + auditConfigs := &v1alpha2.MondooAuditConfigList{} + if err := r.Client.List(ctx, auditConfigs); err != nil { + logger := ctrllog.Log.WithName("cronjob-pod-watcher") + logger.Error(err, "Failed to list MondooAuditConfigs") + return requests + } + + for _, a := range auditConfigs.Items { + if a.Namespace == o.GetNamespace() { + podLabels := o.GetLabels() + isScanPod := isCronJobScanPod(a, podLabels) + if !isScanPod { + return []reconcile.Request{{NamespacedName: client.ObjectKeyFromObject(&a)}} + } + } + } + return []reconcile.Request{} +} + +// isCronJobScanPod checks whether the provided podLabels belong to one of the Mondoo scan CronJobs +func isCronJobScanPod(a v1alpha2.MondooAuditConfig, podLabels map[string]string) bool { + isNodeScanPod := true + isResourceScanPod := true + isImageScanPod := true + + // Check whether it is a Pod for node scanning + if a.Spec.Nodes.Enable { + nodeCronJobLabels := nodes.CronJobLabels(a) + // podLabels should include all of the labels from type of the CronJobs + for k, v := range nodeCronJobLabels { + if val, ok := podLabels[k]; ok { + if val != v { + isNodeScanPod = false + break + } + } else { + isNodeScanPod = false + break + } + } + } + if isNodeScanPod { + return isNodeScanPod + } + + // Check whether it is a Pod for k8s resource scanning + if a.Spec.KubernetesResources.Enable { + resourceCronJobLabels := k8s_scan.CronJobLabels(a) + // podLabels should include all of the labels from type of the CronJobs + for k, v := range resourceCronJobLabels { + if val, ok := podLabels[k]; ok { + if val != v { + isResourceScanPod = false + break + } + } else { + isResourceScanPod = false + break + } + } + } + if isResourceScanPod { + return isResourceScanPod + } + + // Check whether it is a Pod for container image scanning + if a.Spec.Containers.Enable { + imageCronJobLabels := container_image.CronJobLabels(a) + // podLabels should include all of the labels from type of the CronJobs + for k, v := range imageCronJobLabels { + if val, ok := podLabels[k]; ok { + if val != v { + isImageScanPod = false + break + } + } else { + isImageScanPod = false + break + } + } + } + if isImageScanPod { + return isImageScanPod + } + + return false +} + func (r *MondooAuditConfigReconciler) exchangeTokenForServiceAccount(ctx context.Context, auditConfig *v1alpha2.MondooAuditConfig, cfg *v1alpha2.MondooOperatorConfig, log logr.Logger) error { if auditConfig.Spec.MondooCredsSecretRef.Name == "" { log.Info("MondooAuditConfig without .spec.mondooCredsSecretRef defined") @@ -374,6 +466,10 @@ func (r *MondooAuditConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { For(&v1alpha2.MondooAuditConfig{}). Owns(&batchv1.CronJob{}). Owns(&appsv1.Deployment{}). + Watches( + &corev1.Pod{}, + handler.EnqueueRequestsFromMapFunc(r.cronJobPodsRequestMapper), + builder.WithPredicates(k8s.CreateUpdateEventsPredicate{})). Watches( &corev1.Node{}, handler.EnqueueRequestsFromMapFunc(r.nodeEventsRequestMapper), diff --git a/controllers/mondooauditconfig_controller_test.go b/controllers/mondooauditconfig_controller_test.go index 877403b70..8a0d1567b 100644 --- a/controllers/mondooauditconfig_controller_test.go +++ b/controllers/mondooauditconfig_controller_test.go @@ -26,6 +26,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "go.mondoo.com/mondoo-operator/api/v1alpha2" + "go.mondoo.com/mondoo-operator/controllers/container_image" + "go.mondoo.com/mondoo-operator/controllers/k8s_scan" + "go.mondoo.com/mondoo-operator/controllers/nodes" "go.mondoo.com/mondoo-operator/controllers/resource_monitor/scan_api_store" "go.mondoo.com/mondoo-operator/controllers/status" "go.mondoo.com/mondoo-operator/pkg/client/mondooclient" @@ -446,3 +449,101 @@ func testIntegrationTokenSecret() *corev1.Secret { }, } } + +func TestIsCronJobScanPod(t *testing.T) { + a := v1alpha2.MondooAuditConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mondoo-client", + Namespace: "mondoo-operator", + }, + Spec: v1alpha2.MondooAuditConfigSpec{ + Nodes: v1alpha2.Nodes{ + Enable: true, + }, + KubernetesResources: v1alpha2.KubernetesResources{ + Enable: true, + }, + Containers: v1alpha2.Containers{ + Enable: true, + }, + }, + } + + nodeCronJobLabels := nodes.CronJobLabels(a) + resourceCronJobLabels := k8s_scan.CronJobLabels(a) + imageCronJobLabels := container_image.CronJobLabels(a) + + tests := []struct { + name string + podLabels map[string]string + wantResult bool + }{ + { + name: "node scan pod", + podLabels: map[string]string{ + "app": "mondoo", + "scan": "nodes", + "mondoo_cr": "mondoo-client", + "job-name": nodeCronJobLabels["job-name"], + }, + wantResult: true, + }, + { + name: "k8s resource scan pod", + podLabels: map[string]string{ + "app": "mondoo-k8s-scan", + "scan": "k8s", + "mondoo_cr": "mondoo-client", + "job-name": resourceCronJobLabels["job-name"], + }, + wantResult: true, + }, + { + name: "container image scan pod", + podLabels: map[string]string{ + "app": "mondoo-container-scan", + "scan": "k8s", + "mondoo_cr": "mondoo-client", + "job-name": imageCronJobLabels["job-name"], + }, + wantResult: true, + }, + { + name: "mondoo node scan pod missing label", + podLabels: map[string]string{ + "scan": "node", + "mondoo_cr": "mondoo-client", + "job-name": imageCronJobLabels["job-name"], + }, + wantResult: false, + }, + { + name: "non-mondoo node scan pod", + podLabels: map[string]string{ + "app": "not-mondoo", + "scan": "node", + "mondoo_cr": "mondoo-client", + "job-name": imageCronJobLabels["job-name"], + }, + wantResult: false, + }, + { + name: "invalid pod labels", + podLabels: map[string]string{ + "app": "mondoo", + "component": "invalid", + "job-name": "invalid", + }, + wantResult: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotResult := isCronJobScanPod(a, tt.podLabels) + if gotResult != tt.wantResult { + t.Errorf("isCronJobScanPod() = %v, want %v", gotResult, tt.wantResult) + } + }) + } +} diff --git a/controllers/nodes/conditions.go b/controllers/nodes/conditions.go index 13c956c2d..b541ba3ce 100644 --- a/controllers/nodes/conditions.go +++ b/controllers/nodes/conditions.go @@ -10,11 +10,13 @@ import ( corev1 "k8s.io/api/core/v1" ) -func updateNodeConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool) { +func updateNodeConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) { msg := "Node Scanning is available" reason := "NodeScanningAvailable" status := corev1.ConditionFalse updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" if !config.Spec.Nodes.Enable { msg = "Node Scanning is disabled" reason = "NodeScanningDisabled" @@ -25,6 +27,19 @@ func updateNodeConditions(config *v1alpha2.MondooAuditConfig, degradedStatus boo status = corev1.ConditionTrue } + for _, pod := range pods.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137 { + // TODO: double check container name? + msg = "Node Scanning is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String() + reason = "NodeScanningUnavailable" + status = corev1.ConditionTrue + } + } + } + config.Status.Conditions = mondoo.SetMondooAuditCondition( - config.Status.Conditions, v1alpha2.NodeScanningDegraded, status, reason, msg, updateCheck) + config.Status.Conditions, v1alpha2.NodeScanningDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) } diff --git a/controllers/nodes/deployment_handler.go b/controllers/nodes/deployment_handler.go index 53e84bbd0..21ec53401 100644 --- a/controllers/nodes/deployment_handler.go +++ b/controllers/nodes/deployment_handler.go @@ -128,7 +128,21 @@ func (n *DeploymentHandler) syncCronJob(ctx context.Context) error { return err } - updateNodeConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs)) + // Get Pods for this CronJob + pods := &corev1.PodList{} + if len(cronJobs) > 0 { + opts := &client.ListOptions{ + Namespace: n.Mondoo.Namespace, + LabelSelector: labels.SelectorFromSet(CronJobLabels(*n.Mondoo)), + } + err = n.KubeClient.List(ctx, pods, opts) + if err != nil { + logger.Error(err, "Failed to list Pods for Node Scanning") + return err + } + } + + updateNodeConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs), pods) return nil } @@ -300,7 +314,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error { } // Update any remnant conditions - updateNodeConditions(n.Mondoo, false) + updateNodeConditions(n.Mondoo, false, &corev1.PodList{}) return nil } diff --git a/controllers/nodes/deployment_handler_test.go b/controllers/nodes/deployment_handler_test.go index 6560ff1fc..b0a3f438c 100644 --- a/controllers/nodes/deployment_handler_test.go +++ b/controllers/nodes/deployment_handler_test.go @@ -19,6 +19,7 @@ import ( "go.mondoo.com/mondoo-operator/tests/framework/utils" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" @@ -433,6 +434,99 @@ func (s *DeploymentHandlerSuite) TestReconcile_NodeScanningStatus() { s.Equal(corev1.ConditionFalse, condition.Status) } +func (s *DeploymentHandlerSuite) TestReconcile_NodeScanningOOMStatus() { + s.seedNodes() + d := s.createDeploymentHandler() + + // Reconcile to create all resources + result, err := d.Reconcile(s.ctx) + s.NoError(err) + s.True(result.IsZero()) + + // Verify the node scanning status is set to available + s.Equal(1, len(d.Mondoo.Status.Conditions)) + condition := d.Mondoo.Status.Conditions[0] + s.Equal("Node Scanning is available", condition.Message) + s.Equal("NodeScanningAvailable", condition.Reason) + s.Equal(corev1.ConditionFalse, condition.Status) + s.Len(condition.AffectedPods, 0) + + listOpts := &client.ListOptions{ + Namespace: s.auditConfig.Namespace, + LabelSelector: labels.SelectorFromSet(CronJobLabels(s.auditConfig)), + } + cronJobs := &batchv1.CronJobList{} + s.NoError(d.KubeClient.List(s.ctx, cronJobs, listOpts)) + + oomPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-scan-123", + Namespace: s.auditConfig.Namespace, + Labels: CronJobLabels(s.auditConfig), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "node-scan", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceMemory: *resource.NewQuantity(1, resource.BinarySI), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "node-scan", + LastTerminationState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 137, + }, + }, + }, + }, + }, + } + + err = d.KubeClient.Create(s.ctx, oomPod) + s.NoError(err) + + // Reconcile to update the audit config status + result, err = d.Reconcile(s.ctx) + s.NoError(err) + s.True(result.IsZero()) + + pods := &corev1.PodList{} + s.NoError(d.KubeClient.List(s.ctx, pods, listOpts)) + s.Equal(1, len(pods.Items)) + + // Verify the node scanning status is set to unavailable + condition = d.Mondoo.Status.Conditions[0] + s.Equal("Node Scanning is unavailable due to OOM", condition.Message) + s.Len(condition.AffectedPods, 1) + s.Contains(condition.AffectedPods, "node-scan-123") + containerMemory := pods.Items[0].Spec.Containers[0].Resources.Limits.Memory() + s.Equal(containerMemory.String(), condition.MemoryLimit) + s.Equal("NodeScanningUnavailable", condition.Reason) + s.Equal(corev1.ConditionTrue, condition.Status) + + err = d.KubeClient.Delete(s.ctx, &pods.Items[0]) + s.NoError(err) + result, err = d.Reconcile(s.ctx) + s.NoError(err) + s.True(result.IsZero()) + + // Verify the node scanning status is set to available again + s.Equal(1, len(d.Mondoo.Status.Conditions)) + condition = d.Mondoo.Status.Conditions[0] + s.Equal("Node Scanning is available", condition.Message) + s.Equal("NodeScanningAvailable", condition.Reason) + s.Equal(corev1.ConditionFalse, condition.Status) + s.Len(condition.AffectedPods, 0) +} + func (s *DeploymentHandlerSuite) TestReconcile_DisableNodeScanning() { s.seedNodes() d := s.createDeploymentHandler() diff --git a/controllers/scanapi/conditions.go b/controllers/scanapi/conditions.go index dd93a1705..7dd73c02d 100644 --- a/controllers/scanapi/conditions.go +++ b/controllers/scanapi/conditions.go @@ -12,11 +12,13 @@ import ( corev1 "k8s.io/api/core/v1" ) -func updateScanAPIConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool, conditions []appsv1.DeploymentCondition) { +func updateScanAPIConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool, conditions []appsv1.DeploymentCondition, pods *corev1.PodList) { msg := "ScanAPI controller is available" reason := "ScanAPIAvailable" status := corev1.ConditionFalse updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange + affectedPods := []string{} + memoryLimit := "" if !config.Spec.KubernetesResources.Enable && !config.Spec.Admission.Enable { msg = "ScanAPI is disabled" reason = "ScanAPIDisabled" @@ -33,9 +35,20 @@ func updateScanAPIConditions(config *mondoov1alpha2.MondooAuditConfig, degradedS } } + for _, pod := range pods.Items { + for _, status := range pod.Status.ContainerStatuses { + if status.LastTerminationState.Terminated != nil && status.LastTerminationState.Terminated.ExitCode == 137 { + // TODO: double check container name? + msg = "ScanAPI controller is unavailable due to OOM" + affectedPods = append(affectedPods, pod.Name) + memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String() + } + } + } + reason = "ScanAPIUnvailable" status = corev1.ConditionTrue } - config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.ScanAPIDegraded, status, reason, msg, updateCheck) + config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.ScanAPIDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) } diff --git a/controllers/scanapi/deployment_handler.go b/controllers/scanapi/deployment_handler.go index 61632a55f..1a3bba9f0 100644 --- a/controllers/scanapi/deployment_handler.go +++ b/controllers/scanapi/deployment_handler.go @@ -65,7 +65,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error { } // Make sure to clear any degraded status - updateScanAPIConditions(n.Mondoo, false, []appsv1.DeploymentCondition{}) + updateScanAPIConditions(n.Mondoo, false, []appsv1.DeploymentCondition{}, &corev1.PodList{}) return nil } @@ -143,7 +143,20 @@ func (n *DeploymentHandler) syncDeployment(ctx context.Context) error { return nil } - updateScanAPIConditions(n.Mondoo, existingDeployment.Status.UnavailableReplicas != 0, existingDeployment.Status.Conditions) + // Get Pods for this deployment + selector, _ := metav1.LabelSelectorAsSelector(existingDeployment.Spec.Selector) + opts := []client.ListOption{ + client.InNamespace(existingDeployment.Namespace), + client.MatchingLabelsSelector{Selector: selector}, + } + pods := &corev1.PodList{} + err = n.KubeClient.List(ctx, pods, opts...) + if err != nil { + logger.Error(err, "Failed to list Pods for scan API") + return err + } + + updateScanAPIConditions(n.Mondoo, existingDeployment.Status.UnavailableReplicas != 0, existingDeployment.Status.Conditions, pods) if !k8s.AreDeploymentsEqual(*deployment, existingDeployment) { logger.Info("Update needed for scan API Deployment") diff --git a/controllers/scanapi/deployment_handler_test.go b/controllers/scanapi/deployment_handler_test.go index fcf9b8b88..778a53135 100644 --- a/controllers/scanapi/deployment_handler_test.go +++ b/controllers/scanapi/deployment_handler_test.go @@ -19,6 +19,7 @@ import ( fakeMondoo "go.mondoo.com/mondoo-operator/pkg/utils/mondoo/fake" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -353,6 +354,81 @@ func (s *DeploymentHandlerSuite) TestDeploy_CreateMissingServiceAccount() { s.Assertions.Truef(foundMissingServiceAccountCondition, "No Condition for missing service account found") } +func (s *DeploymentHandlerSuite) TestDeploy_CreateOOMCondition() { + ns := "test-ns" + s.auditConfig = utils.DefaultAuditConfig(ns, false, false, false, true) + + image, err := s.containerImageResolver.CnspecImage( + s.auditConfig.Spec.Scanner.Image.Name, s.auditConfig.Spec.Scanner.Image.Tag, false) + s.NoError(err) + + labels := DeploymentLabels(s.auditConfig) + deployment := ScanApiDeployment(s.auditConfig.Namespace, image, s.auditConfig, mondoov1alpha2.MondooOperatorConfig{}, "", false) + deployment.Status.UnavailableReplicas = 1 + deployment.Status.Conditions = []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentConditionType(mondoov1alpha2.ScanAPIDegraded), + Status: "ScanAPI degarded", + Message: "", // This message is not important for the test. The Container Status is evaluated for OOM. + }, + } + + oomPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "scan-api-123", + Namespace: ns, + Labels: labels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "scan-api", + Image: image, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceMemory: *resource.NewQuantity(1, resource.BinarySI), + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "scan-api", + LastTerminationState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 137, + }, + }, + }, + }, + }, + } + + s.fakeClientBuilder = s.fakeClientBuilder.WithObjects(&s.auditConfig, deployment, oomPod) + + d := s.createDeploymentHandler() + result, err := d.Reconcile(s.ctx) + s.NoError(err) + s.True(result.IsZero()) + + ds := &appsv1.DeploymentList{} + s.NoError(d.KubeClient.List(s.ctx, ds)) + s.Equal(1, len(ds.Items)) + + pods := &corev1.PodList{} + s.NoError(d.KubeClient.List(s.ctx, pods)) + s.Equal(1, len(pods.Items)) + + // ordering is fixed: 5 => ScanAPI + condition := s.auditConfig.Status.Conditions[0] + s.Assertions.NotEmpty(condition) + s.Contains(condition.Message, " OOM") + s.Contains(condition.AffectedPods, "scan-api-123") + s.Contains(condition.MemoryLimit, "1") +} + func (s *DeploymentHandlerSuite) TestReconcile_Update() { image, err := s.containerImageResolver.CnspecImage( s.auditConfig.Spec.Scanner.Image.Name, s.auditConfig.Spec.Scanner.Image.Tag, false) diff --git a/controllers/status/operator_status.go b/controllers/status/operator_status.go index 02b605baf..8bd37b6c1 100644 --- a/controllers/status/operator_status.go +++ b/controllers/status/operator_status.go @@ -4,10 +4,14 @@ package status import ( + "strings" + + "github.com/go-logr/logr" "go.mondoo.com/mondoo-operator/api/v1alpha2" "go.mondoo.com/mondoo-operator/pkg/client/mondooclient" "go.mondoo.com/mondoo-operator/pkg/utils/mondoo" "go.mondoo.com/mondoo-operator/pkg/version" + "google.golang.org/protobuf/types/known/structpb" v1 "k8s.io/api/core/v1" k8sversion "k8s.io/apimachinery/pkg/version" ) @@ -40,7 +44,7 @@ type MondooAuditConfig struct { } func ReportStatusRequestFromAuditConfig( - integrationMrn string, m v1alpha2.MondooAuditConfig, nodes []v1.Node, k8sVersion *k8sversion.Info, + integrationMrn string, m v1alpha2.MondooAuditConfig, nodes []v1.Node, k8sVersion *k8sversion.Info, log logr.Logger, ) mondooclient.ReportStatusRequest { nodeNames := make([]string, len(nodes)) for i := range nodes { @@ -56,6 +60,13 @@ func ReportStatusRequestFromAuditConfig( if k8sResourcesScanning != nil { if k8sResourcesScanning.Status == v1.ConditionTrue { messages[0].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(k8sResourcesScanning.Message, k8sResourcesScanning.AffectedPods, k8sResourcesScanning.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Kubernetes Resource Scanning on OOM error") + } + if extraStruct != nil { + messages[4].Extra = extraStruct + } } else { messages[0].Status = mondooclient.MessageStatus_MESSAGE_INFO } @@ -72,14 +83,21 @@ func ReportStatusRequestFromAuditConfig( // Container image scanning status messages[1].Identifier = ContainerImageScanningIdentifier if m.Spec.KubernetesResources.ContainerImageScanning || m.Spec.Containers.Enable { - k8sResourcesScanning := mondoo.FindMondooAuditConditions(m.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded) - if k8sResourcesScanning != nil { - if k8sResourcesScanning.Status == v1.ConditionTrue { + containerImageScanning := mondoo.FindMondooAuditConditions(m.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded) + if containerImageScanning != nil { + if containerImageScanning.Status == v1.ConditionTrue { messages[1].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(containerImageScanning.Message, containerImageScanning.AffectedPods, containerImageScanning.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Kubernetes Container Image on OOM error") + } + if extraStruct != nil { + messages[4].Extra = extraStruct + } } else { messages[1].Status = mondooclient.MessageStatus_MESSAGE_INFO } - messages[1].Message = k8sResourcesScanning.Message + messages[1].Message = containerImageScanning.Message } else { messages[1].Status = mondooclient.MessageStatus_MESSAGE_UNKNOWN messages[1].Message = noStatusMessage @@ -96,6 +114,13 @@ func ReportStatusRequestFromAuditConfig( if nodeScanning != nil { if nodeScanning.Status == v1.ConditionTrue { messages[2].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(nodeScanning.Message, nodeScanning.AffectedPods, nodeScanning.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Node Scanning on OOM error") + } + if extraStruct != nil { + messages[4].Extra = extraStruct + } } else { messages[2].Status = mondooclient.MessageStatus_MESSAGE_INFO } @@ -116,6 +141,13 @@ func ReportStatusRequestFromAuditConfig( if admissionControllerScanning != nil { if admissionControllerScanning.Status == v1.ConditionTrue { messages[3].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(admissionControllerScanning.Message, admissionControllerScanning.AffectedPods, admissionControllerScanning.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Admission Controller on OOM error") + } + if extraStruct != nil { + messages[4].Extra = extraStruct + } } else { messages[3].Status = mondooclient.MessageStatus_MESSAGE_INFO } @@ -135,6 +167,13 @@ func ReportStatusRequestFromAuditConfig( if scanApi != nil { if scanApi.Status == v1.ConditionTrue { messages[4].Status = mondooclient.MessageStatus_MESSAGE_ERROR + extraStruct, err := createOOMExtraInformation(scanApi.Message, scanApi.AffectedPods, scanApi.MemoryLimit) + if err != nil { + log.Error(err, "Failed to create extra information for Scan API on OOM error") + } + if extraStruct != nil { + messages[4].Extra = extraStruct + } } else { messages[4].Status = mondooclient.MessageStatus_MESSAGE_INFO } @@ -174,3 +213,19 @@ func ReportStatusRequestFromAuditConfig( Messages: mondooclient.Messages{Messages: messages}, } } + +func createOOMExtraInformation(message string, affectedPods []string, memoryLimit string) (*structpb.Struct, error) { + var pbStruct *structpb.Struct + var err error + if strings.HasSuffix(message, " OOM") { + pbStruct, err = structpb.NewStruct(map[string]interface{}{ + "errorCode": "OOMKilled", + "affectedPods": strings.Join(affectedPods, ", "), + "memoryLimit": memoryLimit, + }) + if err != nil { + return nil, err + } + } + return pbStruct, nil +} diff --git a/controllers/status/operator_status_test.go b/controllers/status/operator_status_test.go index d1d82d726..e6889bc4a 100644 --- a/controllers/status/operator_status_test.go +++ b/controllers/status/operator_status_test.go @@ -6,7 +6,9 @@ package status import ( "testing" + "github.com/go-logr/logr" "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/types/known/structpb" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sversion "k8s.io/apimachinery/pkg/version" @@ -18,6 +20,7 @@ import ( ) func TestReportStatusRequestFromAuditConfig_AllDisabled(t *testing.T) { + logger := logr.Logger{} integrationMrn := utils.RandString(10) nodes := []v1.Node{ {ObjectMeta: metav1.ObjectMeta{Name: "node1"}}, @@ -26,7 +29,7 @@ func TestReportStatusRequestFromAuditConfig_AllDisabled(t *testing.T) { v := &k8sversion.Info{GitVersion: "v1.24.0"} m := testMondooAuditConfig() - reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v) + reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) assert.Equal(t, integrationMrn, reportStatus.Mrn) assert.Equal(t, mondooclient.Status_ACTIVE, reportStatus.Status) assert.Equal(t, OperatorCustomState{ @@ -47,6 +50,7 @@ func TestReportStatusRequestFromAuditConfig_AllDisabled(t *testing.T) { } func TestReportStatusRequestFromAuditConfig_AllEnabled(t *testing.T) { + logger := logr.Logger{} integrationMrn := utils.RandString(10) nodes := []v1.Node{ {ObjectMeta: metav1.ObjectMeta{Name: "node1"}}, @@ -65,14 +69,14 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled(t *testing.T) { } m.Status.Conditions = []v1alpha2.MondooAuditConfigCondition{ - {Message: "Kubernetes Resources Scanning is Available", Status: v1.ConditionFalse, Type: v1alpha2.K8sResourcesScanningDegraded}, - {Message: "Kubernetes Container Image Scanning is Available", Status: v1.ConditionFalse, Type: v1alpha2.K8sContainerImageScanningDegraded}, + {Message: "Kubernetes Resources Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sResourcesScanningDegraded}, + {Message: "Kubernetes Container Image Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sContainerImageScanningDegraded}, {Message: "Node Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.NodeScanningDegraded}, {Message: "Admission controller is available", Status: v1.ConditionFalse, Type: v1alpha2.AdmissionDegraded}, {Message: "ScanAPI controller is available", Status: v1.ConditionFalse, Type: v1alpha2.ScanAPIDegraded}, } - reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v) + reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) assert.Equal(t, integrationMrn, reportStatus.Mrn) assert.Equal(t, mondooclient.Status_ACTIVE, reportStatus.Status) assert.Equal(t, OperatorCustomState{ @@ -102,6 +106,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled(t *testing.T) { } func TestReportStatusRequestFromAuditConfig_AllEnabled_DeprecatedFields(t *testing.T) { + logger := logr.Logger{} integrationMrn := utils.RandString(10) nodes := []v1.Node{ {ObjectMeta: metav1.ObjectMeta{Name: "node1"}}, @@ -120,14 +125,14 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled_DeprecatedFields(t *testi } m.Status.Conditions = []v1alpha2.MondooAuditConfigCondition{ - {Message: "Kubernetes Resources Scanning is Available", Status: v1.ConditionFalse, Type: v1alpha2.K8sResourcesScanningDegraded}, - {Message: "Kubernetes Container Image Scanning is Available", Status: v1.ConditionFalse, Type: v1alpha2.K8sContainerImageScanningDegraded}, + {Message: "Kubernetes Resources Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sResourcesScanningDegraded}, + {Message: "Kubernetes Container Image Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sContainerImageScanningDegraded}, {Message: "Node Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.NodeScanningDegraded}, {Message: "Admission controller is available", Status: v1.ConditionFalse, Type: v1alpha2.AdmissionDegraded}, {Message: "ScanAPI controller is available", Status: v1.ConditionFalse, Type: v1alpha2.ScanAPIDegraded}, } - reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v) + reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) assert.Equal(t, integrationMrn, reportStatus.Mrn) assert.Equal(t, mondooclient.Status_ACTIVE, reportStatus.Status) assert.Equal(t, OperatorCustomState{ @@ -157,6 +162,7 @@ func TestReportStatusRequestFromAuditConfig_AllEnabled_DeprecatedFields(t *testi } func TestReportStatusRequestFromAuditConfig_AllError(t *testing.T) { + logger := logr.Logger{} integrationMrn := utils.RandString(10) nodes := []v1.Node{ {ObjectMeta: metav1.ObjectMeta{Name: "node1"}}, @@ -178,7 +184,7 @@ func TestReportStatusRequestFromAuditConfig_AllError(t *testing.T) { {Message: "ScanAPI controller error", Status: v1.ConditionTrue, Type: v1alpha2.ScanAPIDegraded}, } - reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v) + reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) assert.Equal(t, integrationMrn, reportStatus.Mrn) assert.Equal(t, mondooclient.Status_ERROR, reportStatus.Status) assert.Equal(t, OperatorCustomState{ @@ -210,3 +216,130 @@ func testMondooAuditConfig() v1alpha2.MondooAuditConfig { }, } } + +func TestReportStatusRequestFromAuditConfig_AllEnabled_ScanAPI_OOM(t *testing.T) { + logger := logr.Logger{} + integrationMrn := utils.RandString(10) + nodes := []v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "node1"}}, + {ObjectMeta: metav1.ObjectMeta{Name: "node2"}}, + } + v := &k8sversion.Info{GitVersion: "v1.24.0"} + + m := testMondooAuditConfig() + m.Spec.KubernetesResources.Enable = true + m.Spec.Containers.Enable = true + m.Spec.Nodes.Enable = true + m.Spec.Admission.Enable = true + + m.Status.Conditions = []v1alpha2.MondooAuditConfigCondition{ + {Message: "Kubernetes Resources Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sResourcesScanningDegraded}, + {Message: "Kubernetes Container Image Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.K8sContainerImageScanningDegraded}, + {Message: "Node Scanning is available", Status: v1.ConditionFalse, Type: v1alpha2.NodeScanningDegraded}, + {Message: "Admission controller is available", Status: v1.ConditionFalse, Type: v1alpha2.AdmissionDegraded}, + {Message: "ScanAPI controller is degraded due to OOM", Status: v1.ConditionTrue, Type: v1alpha2.ScanAPIDegraded, AffectedPods: []string{"scanapi-1", "scanapi-2"}, MemoryLimit: "300Mi"}, + } + + reportStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes, v, logger) + assert.Equal(t, integrationMrn, reportStatus.Mrn) + assert.Equal(t, mondooclient.Status_ERROR, reportStatus.Status) + extraData := reportStatus.Messages.Messages[4].Extra.(*structpb.Struct) + extraMap := extraData.AsMap() + assert.Contains(t, extraMap, "errorCode") + assert.Contains(t, extraMap, "affectedPods") + assert.Contains(t, extraMap, "memoryLimit") + assert.Contains(t, extraMap["errorCode"], "OOMKilled") + assert.Contains(t, extraMap["affectedPods"], "scanapi-1") + assert.Contains(t, extraMap["memoryLimit"], "300Mi") +} + +func TestCreateOOMExtraInformation(t *testing.T) { + // Test cases + tests := []struct { + name string + message string + affectedPods []string + memoryLimit string + expected *structpb.Struct + expectedErr error + }{ + { + name: "Message ends with OOM", + message: "Container was terminated due to OOM", + affectedPods: []string{"pod1", "pod2"}, + memoryLimit: "1Gi", + expected: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "errorCode": { + Kind: &structpb.Value_StringValue{ + StringValue: "OOMKilled", + }, + }, + "affectedPods": { + Kind: &structpb.Value_StringValue{ + StringValue: "pod1, pod2", + }, + }, + "memoryLimit": { + Kind: &structpb.Value_StringValue{ + StringValue: "1Gi", + }, + }, + }, + }, + expectedErr: nil, + }, + { + name: "Message does not end with OOM", + message: "Container was terminated due to an error", + affectedPods: []string{"pod1", "pod2"}, + memoryLimit: "1Gi", + expected: nil, + expectedErr: nil, + }, + } + + // Run tests + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual, err := createOOMExtraInformation(tt.message, tt.affectedPods, tt.memoryLimit) + assert.Equal(t, tt.expectedErr, err) + assert.Equal(t, tt.expected, actual) + }) + } +} + +func TestCreateOOMExtraInformationPBMap(t *testing.T) { + // Test cases + tests := []struct { + name string + message string + affectedPods []string + memoryLimit string + expected map[string]interface{} + expectedErr error + }{ + { + name: "Message ends with OOM", + message: "Container was terminated due to OOM", + affectedPods: []string{"pod1", "pod2"}, + memoryLimit: "1Gi", + expected: map[string]interface{}{ + "errorCode": "OOMKilled", + "affectedPods": "pod1, pod2", + "memoryLimit": "1Gi", + }, + expectedErr: nil, + }, + } + + // Run tests + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual, err := createOOMExtraInformation(tt.message, tt.affectedPods, tt.memoryLimit) + + assert.Equal(t, tt.expectedErr, err) + assert.Equal(t, tt.expected, actual.AsMap()) + }) + } +} diff --git a/controllers/status/status_reporter.go b/controllers/status/status_reporter.go index b75b730af..f64de271e 100644 --- a/controllers/status/status_reporter.go +++ b/controllers/status/status_reporter.go @@ -54,7 +54,7 @@ func (r *StatusReporter) Report(ctx context.Context, m v1alpha2.MondooAuditConfi return err } - operatorStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes.Items, r.k8sVersion) + operatorStatus := ReportStatusRequestFromAuditConfig(integrationMrn, m, nodes.Items, r.k8sVersion, logger) if reflect.DeepEqual(operatorStatus, r.lastReportedStatus) { return nil // If the status hasn't change, don't report } diff --git a/pkg/utils/mondoo/condition.go b/pkg/utils/mondoo/condition.go index 08cbd03bc..29e72581f 100644 --- a/pkg/utils/mondoo/condition.go +++ b/pkg/utils/mondoo/condition.go @@ -151,6 +151,8 @@ func SetMondooAuditCondition( reason string, message string, updateConditionCheck UpdateConditionCheck, + affectedPods []string, + memoryLimit string, ) []mondoov1alpha2.MondooAuditConfigCondition { now := metav1.Now() existingCondition := FindMondooAuditConditions(conditions, conditionType) @@ -164,6 +166,8 @@ func SetMondooAuditCondition( Message: message, LastTransitionTime: now, LastUpdateTime: now, + AffectedPods: affectedPods, + MemoryLimit: memoryLimit, }, ) } else { @@ -179,6 +183,8 @@ func SetMondooAuditCondition( existingCondition.Reason = reason existingCondition.Message = message existingCondition.LastUpdateTime = now + existingCondition.AffectedPods = affectedPods + existingCondition.MemoryLimit = memoryLimit } } return conditions