From 79e0f9eb9427d82e3a1d6765c35882c2eae76a9d Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Wed, 30 Oct 2024 16:03:59 +0000 Subject: [PATCH] Fix ansible job error reason This change fixes a nil pointer dereference by ensuring the job failures exceed the defined BackoffLimit. This ensures that the Reason condition of the Job status struct is populated before we try to reference it. closes OSPRH-11068 Signed-off-by: Fabricio Aguiar --- pkg/dataplane/deployment.go | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pkg/dataplane/deployment.go b/pkg/dataplane/deployment.go index 1483a9b9e..e2a456193 100644 --- a/pkg/dataplane/deployment.go +++ b/pkg/dataplane/deployment.go @@ -195,11 +195,10 @@ func (d *Deployer) ConditionalDeploy( } - var ansibleCondition *batchv1.JobCondition if nsConditions.IsFalse(readyCondition) { - var ansibleEE *batchv1.Job + var ansibleJob *batchv1.Job _, labelSelector := dataplaneutil.GetAnsibleExecutionNameAndLabels(&foundService, d.Deployment.Name, d.NodeSet.Name) - ansibleEE, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector) + ansibleJob, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector) if err != nil { // Return nil if we don't have AnsibleEE available yet if k8s_errors.IsNotFound(err) { @@ -215,33 +214,34 @@ func (d *Deployer) ConditionalDeploy( err.Error())) } - if ansibleEE.Status.Succeeded > 0 { + if ansibleJob.Status.Succeeded > 0 { log.Info(fmt.Sprintf("Condition %s ready", readyCondition)) nsConditions.Set(condition.TrueCondition( readyCondition, readyMessage)) - } else if ansibleEE.Status.Active > 0 { - log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleEE.Name, ansibleEE.Status.Active)) + } else if ansibleJob.Status.Active > 0 { + log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleJob.Name, ansibleJob.Status.Active)) nsConditions.Set(condition.FalseCondition( readyCondition, condition.RequestedReason, condition.SeverityInfo, readyWaitingMessage)) - } else if ansibleEE.Status.Failed > 0 { - errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.Failed) - for _, condition := range ansibleEE.Status.Conditions { + } else if ansibleJob.Status.Failed >= *ansibleJob.Spec.BackoffLimit { + errorReason := condition.ErrorReason + errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleJob.Name, ansibleJob.Namespace, ansibleJob.Status.Failed) + for _, condition := range ansibleJob.Status.Conditions { if condition.Type == batchv1.JobFailed { - ansibleCondition = &condition + if condition.Reason == batchv1.JobReasonBackoffLimitExceeded { + errorReason = condition.Reason + errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleJob.Name, ansibleJob.Namespace, condition.Message) + } } } - if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded { - errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleCondition.Message) - } log.Info(fmt.Sprintf("Condition %s error", readyCondition)) err = fmt.Errorf(errorMsg) nsConditions.Set(condition.FalseCondition( readyCondition, - condition.Reason(ansibleCondition.Reason), + condition.Reason(errorReason), condition.SeverityError, readyErrorMessage, err.Error()))