Skip to content

Commit

Permalink
✨ Report on OOMkilled status (#928)
Browse files Browse the repository at this point in the history
* ✨ Report on OOMkilled status

Signed-off-by: Christian Zunker <[email protected]>
  • Loading branch information
czunker authored Nov 16, 2023
1 parent bf8c252 commit 354bf9e
Show file tree
Hide file tree
Showing 24 changed files with 772 additions and 51 deletions.
4 changes: 4 additions & 0 deletions api/v1alpha2/mondooauditconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ type MondooAuditConfigCondition struct {
Reason string `json:"reason,omitempty"`
// Message is a human-readable message indicating details about the last transition
Message string `json:"message,omitempty"`
// AffectedPods, when filled, contains a list which are affected by an issue
AffectedPods []string `json:"affectedPods,omitempty"`
// MemoryLimit contains the currently active memory limit for a Pod
MemoryLimit string `json:"memoryLimit,omitempty"`
}

// MondooOperatorConfigConditionType is a valid value for MondooOperatorConfig.Status.Condition[].Type
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions config/crd/bases/k8s.mondoo.com_mondooauditconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,12 @@ spec:
description: Conditions includes detailed status for the MondooAuditConfig
items:
properties:
affectedPods:
description: AffectedPods, when filled, contains a list which
are affected by an issue
items:
type: string
type: array
lastTransitionTime:
description: LastTransitionTime is the last time the condition
transitioned from one status to another.
Expand All @@ -487,6 +493,10 @@ spec:
description: LastUpdateTime is the last time we probed the condition
format: date-time
type: string
memoryLimit:
description: MemoryLimit contains the currently active memory
limit for a Pod
type: string
message:
description: Message is a human-readable message indicating
details about the last transition
Expand Down
17 changes: 15 additions & 2 deletions controllers/admission/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,30 @@ import (
corev1 "k8s.io/api/core/v1"
)

func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool) {
func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) {
msg := "Admission controller is available"
reason := "AdmissionAvailable"
status := corev1.ConditionFalse
updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange
affectedPods := []string{}
memoryLimit := ""
if !config.Spec.Admission.Enable {
msg = "Admission controller is disabled"
reason = "AdmissionDisabled"
status = corev1.ConditionFalse
} else if degradedStatus {
msg = "Admission controller is unavailable"
for _, pod := range pods.Items {
for _, status := range pod.Status.ContainerStatuses {
if status.LastTerminationState.Terminated != nil && status.LastTerminationState.Terminated.ExitCode == 137 {
// TODO: double check container name?
msg = "Admission controller is unavailable due to OOM"
affectedPods = append(affectedPods, pod.Name)
memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String()
break
}
}
}
reason = "AdmissionUnvailable"
status = corev1.ConditionTrue
condition := mondoo.FindMondooAuditConditions(config.Status.Conditions, mondoov1alpha2.ScanAPIDegraded)
Expand All @@ -28,5 +41,5 @@ func updateAdmissionConditions(config *mondoov1alpha2.MondooAuditConfig, degrade
}
}

config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.AdmissionDegraded, status, reason, msg, updateCheck)
config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, mondoov1alpha2.AdmissionDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit)
}
27 changes: 20 additions & 7 deletions controllers/admission/deployment_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ func (n *DeploymentHandler) syncWebhookDeployment(ctx context.Context) error {
webhookLog.V(3).Info("Webhook deployment is only scaled to 1 replica, but the webhook mode is set to 'enforcing'. This might be problematic if the API server is not able to connect to the webhook. Please consider increasing the replicas.")
}

deployment := &appsv1.Deployment{}
created, err := k8s.CreateIfNotExist(ctx, n.KubeClient, deployment, desiredDeployment)
existingDeployment := &appsv1.Deployment{}
created, err := k8s.CreateIfNotExist(ctx, n.KubeClient, existingDeployment, desiredDeployment)
if err != nil {
webhookLog.Error(err, "failed to create Deployment for webhook")
return err
Expand All @@ -250,18 +250,31 @@ func (n *DeploymentHandler) syncWebhookDeployment(ctx context.Context) error {
return nil
}

updateAdmissionConditions(n.Mondoo, n.isWebhookDegraded(deployment))
// Get Pods for this deployment
selector, _ := metav1.LabelSelectorAsSelector(existingDeployment.Spec.Selector)
opts := []client.ListOption{
client.InNamespace(existingDeployment.Namespace),
client.MatchingLabelsSelector{Selector: selector},
}
pods := &corev1.PodList{}
err = n.KubeClient.List(ctx, pods, opts...)
if err != nil {
webhookLog.Error(err, "Failed to list Pods for Admission controller")
return err
}

updateAdmissionConditions(n.Mondoo, n.isWebhookDegraded(existingDeployment), pods)

// Not a full check for whether someone has modified our Deployment, but checking for some important bits so we know
// if an Update() is needed.
if !k8s.AreDeploymentsEqual(*deployment, *desiredDeployment) {
if !k8s.AreDeploymentsEqual(*existingDeployment, *desiredDeployment) {
// Note: changes to the labels/selector labels means we can't Update() the
// Deployment, so we'll do a delete/create instead.
if err := k8s.DeleteIfExists(ctx, n.KubeClient, deployment); err != nil {
if err := k8s.DeleteIfExists(ctx, n.KubeClient, existingDeployment); err != nil {
webhookLog.Error(err, "failed to delete exising webhook Deployment")
return err
}
if _, err := k8s.CreateIfNotExist(ctx, n.KubeClient, deployment, desiredDeployment); err != nil {
if _, err := k8s.CreateIfNotExist(ctx, n.KubeClient, existingDeployment, desiredDeployment); err != nil {
webhookLog.Error(err, "failed to replace exising webhook Deployment")
return err
}
Expand Down Expand Up @@ -526,7 +539,7 @@ func (n *DeploymentHandler) down(ctx context.Context) (ctrl.Result, error) {
}

// Make sure to clear any degraded status
updateAdmissionConditions(n.Mondoo, false)
updateAdmissionConditions(n.Mondoo, false, &corev1.PodList{})

return ctrl.Result{}, nil
}
Expand Down
23 changes: 19 additions & 4 deletions controllers/container_image/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,36 @@ import (
corev1 "k8s.io/api/core/v1"
)

func updateImageScanningConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool) {
msg := "Kubernetes Container Image Scanning is Available"
func updateImageScanningConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) {
msg := "Kubernetes Container Image Scanning is available"
reason := "KubernetesContainerImageScanningAvailable"
status := corev1.ConditionFalse
updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange
affectedPods := []string{}
memoryLimit := ""
if !config.Spec.KubernetesResources.ContainerImageScanning && !config.Spec.Containers.Enable {
msg = "Kubernetes Container Image Scanning is disabled"
reason = "KubernetesContainerImageScanningDisabled"
status = corev1.ConditionFalse
} else if degradedStatus {
msg = "Kubernetes Container Image Scanning is Unavailable"
msg = "Kubernetes Container Image Scanning is unavailable"
reason = "KubernetesContainerImageScanningUnavailable"
status = corev1.ConditionTrue
}

for _, pod := range pods.Items {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137 {
// TODO: double check container name?
msg = "Kubernetes Container Image Scanning is unavailable due to OOM"
affectedPods = append(affectedPods, pod.Name)
memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String()
reason = "KubernetesContainerImageScanningUnavailable"
status = corev1.ConditionTrue
}
}
}

config.Status.Conditions = mondoo.SetMondooAuditCondition(
config.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded, status, reason, msg, updateCheck)
config.Status.Conditions, v1alpha2.K8sContainerImageScanningDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit)
}
20 changes: 18 additions & 2 deletions controllers/container_image/deployment_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,23 @@ func (n *DeploymentHandler) syncCronJob(ctx context.Context) error {
return err
}

updateImageScanningConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs))
// Get Pods for this CronJob
pods := &corev1.PodList{}
if len(cronJobs) > 0 {
lSelector := metav1.SetAsLabelSelector(CronJobLabels(*n.Mondoo))
selector, _ := metav1.LabelSelectorAsSelector(lSelector)
opts := []client.ListOption{
client.InNamespace(n.Mondoo.Namespace),
client.MatchingLabelsSelector{Selector: selector},
}
err = n.KubeClient.List(ctx, pods, opts...)
if err != nil {
logger.Error(err, "Failed to list Pods for Kubernetes Container Image Scanning")
return err
}
}

updateImageScanningConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs), pods)
return nil
}

Expand Down Expand Up @@ -214,7 +230,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error {
}

// Clear any remnant status
updateImageScanningConditions(n.Mondoo, false)
updateImageScanningConditions(n.Mondoo, false, &corev1.PodList{})

return nil
}
6 changes: 3 additions & 3 deletions controllers/container_image/deployment_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus()
// Verify the image scanning status is set to available
s.Equal(1, len(d.Mondoo.Status.Conditions))
condition := d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Container Image Scanning is Available", condition.Message)
s.Equal("Kubernetes Container Image Scanning is available", condition.Message)
s.Equal("KubernetesContainerImageScanningAvailable", condition.Reason)
s.Equal(corev1.ConditionFalse, condition.Status)

Expand All @@ -300,7 +300,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus()

// Verify the image scanning status is set to unavailable
condition = d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Container Image Scanning is Unavailable", condition.Message)
s.Equal("Kubernetes Container Image Scanning is unavailable", condition.Message)
s.Equal("KubernetesContainerImageScanningUnavailable", condition.Reason)
s.Equal(corev1.ConditionTrue, condition.Status)

Expand All @@ -316,7 +316,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sContainerImageScanningStatus()

// Verify the image scanning status is set to available
condition = d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Container Image Scanning is Available", condition.Message)
s.Equal("Kubernetes Container Image Scanning is available", condition.Message)
s.Equal("KubernetesContainerImageScanningAvailable", condition.Reason)
s.Equal(corev1.ConditionFalse, condition.Status)

Expand Down
2 changes: 1 addition & 1 deletion controllers/integration/integration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,5 @@ func updateIntegrationCondition(config *v1alpha2.MondooAuditConfig, degradedStat
msg = customMessage
}

config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, v1alpha2.MondooIntegrationDegraded, status, reason, msg, updateCheck)
config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, v1alpha2.MondooIntegrationDegraded, status, reason, msg, updateCheck, []string{}, "")
}
23 changes: 19 additions & 4 deletions controllers/k8s_scan/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,36 @@ import (
corev1 "k8s.io/api/core/v1"
)

func updateWorkloadsConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool) {
msg := "Kubernetes Resources Scanning is Available"
func updateWorkloadsConditions(config *v1alpha2.MondooAuditConfig, degradedStatus bool, pods *corev1.PodList) {
msg := "Kubernetes Resources Scanning is available"
reason := "KubernetesResourcesScanningAvailable"
status := corev1.ConditionFalse
updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange
affectedPods := []string{}
memoryLimit := ""
if !config.Spec.KubernetesResources.Enable {
msg = "Kubernetes Resources Scanning is disabled"
reason = "KubernetesResourcesScanningDisabled"
status = corev1.ConditionFalse
} else if degradedStatus {
msg = "Kubernetes Resources Scanning is Unavailable"
msg = "Kubernetes Resources Scanning is unavailable"
reason = "KubernetesResourcesScanningUnavailable"
status = corev1.ConditionTrue
}

for _, pod := range pods.Items {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137 {
// TODO: double check container name?
msg = "Kubernetes Resources Scanning is unavailable due to OOM"
affectedPods = append(affectedPods, pod.Name)
memoryLimit = pod.Spec.Containers[0].Resources.Limits.Memory().String()
reason = "KubernetesResourcesScanningUnavailable"
status = corev1.ConditionTrue
}
}
}

config.Status.Conditions = mondoo.SetMondooAuditCondition(
config.Status.Conditions, v1alpha2.K8sResourcesScanningDegraded, status, reason, msg, updateCheck)
config.Status.Conditions, v1alpha2.K8sResourcesScanningDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit)
}
18 changes: 16 additions & 2 deletions controllers/k8s_scan/deployment_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,21 @@ func (n *DeploymentHandler) syncCronJob(ctx context.Context) error {
return err
}

updateWorkloadsConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs))
// Get Pods for this CronJob
pods := &corev1.PodList{}
if len(cronJobs) > 0 {
opts := &client.ListOptions{
Namespace: n.Mondoo.Namespace,
LabelSelector: labels.SelectorFromSet(CronJobLabels(*n.Mondoo)),
}
err = n.KubeClient.List(ctx, pods, opts)
if err != nil {
logger.Error(err, "Failed to list Pods for scan Kubernetes Reosurce Scanning")
return err
}
}

updateWorkloadsConditions(n.Mondoo, !k8s.AreCronJobsSuccessful(cronJobs), pods)
return n.cleanupWorkloadDeployment(ctx)
}

Expand Down Expand Up @@ -137,7 +151,7 @@ func (n *DeploymentHandler) down(ctx context.Context) error {
}

// Clear any remnant status
updateWorkloadsConditions(n.Mondoo, false)
updateWorkloadsConditions(n.Mondoo, false, &corev1.PodList{})

return nil
}
Expand Down
8 changes: 4 additions & 4 deletions controllers/k8s_scan/deployment_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() {
// Verify container image scanning and kubernetes resources conditions
s.Equal(1, len(d.Mondoo.Status.Conditions))
condition := d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Resources Scanning is Available", condition.Message)
s.Equal("Kubernetes Resources Scanning is available", condition.Message)
s.Equal("KubernetesResourcesScanningAvailable", condition.Reason)
s.Equal(corev1.ConditionFalse, condition.Status)

Expand All @@ -240,7 +240,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() {

// Verify the kubernetes resources status is set to unavailable
condition = d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Resources Scanning is Unavailable", condition.Message)
s.Equal("Kubernetes Resources Scanning is unavailable", condition.Message)
s.Equal("KubernetesResourcesScanningUnavailable", condition.Reason)
s.Equal(corev1.ConditionTrue, condition.Status)

Expand All @@ -256,7 +256,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() {

// Verify the kubernetes resources scanning status is set to available
condition = d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Resources Scanning is Available", condition.Message)
s.Equal("Kubernetes Resources Scanning is available", condition.Message)
s.Equal("KubernetesResourcesScanningAvailable", condition.Reason)
s.Equal(corev1.ConditionFalse, condition.Status)

Expand All @@ -274,7 +274,7 @@ func (s *DeploymentHandlerSuite) TestReconcile_K8sResourceScanningStatus() {

// Verify the kubernetes resources scanning status is set to available when there is an active scan
condition = d.Mondoo.Status.Conditions[0]
s.Equal("Kubernetes Resources Scanning is Available", condition.Message)
s.Equal("Kubernetes Resources Scanning is available", condition.Message)
s.Equal("KubernetesResourcesScanningAvailable", condition.Reason)
s.Equal(corev1.ConditionFalse, condition.Status)

Expand Down
Loading

0 comments on commit 354bf9e

Please sign in to comment.