Skip to content

Commit

Permalink
Fix ansible job error reason
Browse files Browse the repository at this point in the history
This change fixes a nil pointer dereference by ensuring the job failures
exceed the defined BackoffLimit. This ensures that the Reason condition
of the Job status struct is populated before we try to reference it.

closes OSPRH-11068

Signed-off-by: Fabricio Aguiar <[email protected]>
  • Loading branch information
fao89 authored and bshephar committed Oct 31, 2024
1 parent 1276a85 commit 79e0f9e
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions pkg/dataplane/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,10 @@ func (d *Deployer) ConditionalDeploy(

}

var ansibleCondition *batchv1.JobCondition
if nsConditions.IsFalse(readyCondition) {
var ansibleEE *batchv1.Job
var ansibleJob *batchv1.Job
_, labelSelector := dataplaneutil.GetAnsibleExecutionNameAndLabels(&foundService, d.Deployment.Name, d.NodeSet.Name)
ansibleEE, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector)
ansibleJob, err = dataplaneutil.GetAnsibleExecution(d.Ctx, d.Helper, d.Deployment, labelSelector)
if err != nil {
// Return nil if we don't have AnsibleEE available yet
if k8s_errors.IsNotFound(err) {
Expand All @@ -215,33 +214,34 @@ func (d *Deployer) ConditionalDeploy(
err.Error()))
}

if ansibleEE.Status.Succeeded > 0 {
if ansibleJob.Status.Succeeded > 0 {
log.Info(fmt.Sprintf("Condition %s ready", readyCondition))
nsConditions.Set(condition.TrueCondition(
readyCondition,
readyMessage))
} else if ansibleEE.Status.Active > 0 {
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleEE.Name, ansibleEE.Status.Active))
} else if ansibleJob.Status.Active > 0 {
log.Info(fmt.Sprintf("AnsibleEE job is not yet completed: Execution: %s, Active pods: %d", ansibleJob.Name, ansibleJob.Status.Active))
nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.RequestedReason,
condition.SeverityInfo,
readyWaitingMessage))
} else if ansibleEE.Status.Failed > 0 {
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleEE.Name, ansibleEE.Namespace, ansibleEE.Status.Failed)
for _, condition := range ansibleEE.Status.Conditions {
} else if ansibleJob.Status.Failed >= *ansibleJob.Spec.BackoffLimit {
errorReason := condition.ErrorReason
errorMsg := fmt.Sprintf("execution.name %s execution.namespace %s failed pods: %d", ansibleJob.Name, ansibleJob.Namespace, ansibleJob.Status.Failed)
for _, condition := range ansibleJob.Status.Conditions {
if condition.Type == batchv1.JobFailed {
ansibleCondition = &condition
if condition.Reason == batchv1.JobReasonBackoffLimitExceeded {
errorReason = condition.Reason
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleJob.Name, ansibleJob.Namespace, condition.Message)
}
}
}
if ansibleCondition.Reason == condition.JobReasonBackoffLimitExceeded {
errorMsg = fmt.Sprintf("backoff limit reached for execution.name %s execution.namespace %s execution.condition.message: %s", ansibleEE.Name, ansibleEE.Namespace, ansibleCondition.Message)
}
log.Info(fmt.Sprintf("Condition %s error", readyCondition))
err = fmt.Errorf(errorMsg)
nsConditions.Set(condition.FalseCondition(
readyCondition,
condition.Reason(ansibleCondition.Reason),
condition.Reason(errorReason),
condition.SeverityError,
readyErrorMessage,
err.Error()))
Expand Down

0 comments on commit 79e0f9e

Please sign in to comment.