Skip to content

Commit

Permalink
feat: check pending job's Pod (#257)
Browse files Browse the repository at this point in the history
* check pending job's Pod

* fix typo
  • Loading branch information
zreigz authored Aug 27, 2024
1 parent 3ca1035 commit c2203f5
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions internal/controller/stackrunjob_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ import (
)

const jobSelector = "stackrun.deployments.plural.sh"
const jobTimout = time.Minute * 40
const jobTimeout = time.Minute * 40
const podTimeout = time.Minute * 2

// StackRunJobReconciler reconciles a Job resource.
type StackRunJobReconciler struct {
Expand Down Expand Up @@ -71,7 +72,7 @@ func (r *StackRunJobReconciler) Reconcile(ctx context.Context, req ctrl.Request)
// Exit if stack run is not in running state (run status already updated),
// or if the job is still running (harness controls run status).
if stackRun.Status != console.StackStatusRunning || job.Status.CompletionTime.IsZero() {
if isActiveJobTimout(stackRun.Status, job) {
if isActiveJobTimout(stackRun.Status, job) || r.isActiveJobPodFailed(ctx, stackRun.Status, job) {
if err := r.killJob(ctx, job); err != nil {
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -178,9 +179,24 @@ func getStackRunID(job *batchv1.Job) string {
return strings.TrimPrefix(job.Name, "stack-")
}

func isActiveJob(stackStatus console.StackStatus, job *batchv1.Job) bool {
return stackStatus == console.StackStatusPending && job.Status.CompletionTime.IsZero() && !job.Status.StartTime.IsZero()
}

func isActiveJobTimout(stackStatus console.StackStatus, job *batchv1.Job) bool {
if stackStatus == console.StackStatusPending && job.Status.CompletionTime.IsZero() && !job.Status.StartTime.IsZero() {
return time.Now().After(job.Status.StartTime.Add(jobTimout))
if isActiveJob(stackStatus, job) {
return time.Now().After(job.Status.StartTime.Add(jobTimeout))
}
return false
}

func (r *StackRunJobReconciler) isActiveJobPodFailed(ctx context.Context, stackStatus console.StackStatus, job *batchv1.Job) bool {
if isActiveJob(stackStatus, job) {
status, err := r.getJobPodStatus(ctx, job.Spec.Selector.MatchLabels)
if err != nil || status == console.StackStatusFailed {
// in case when job's Pod wasn't created yet
return time.Now().After(job.Status.StartTime.Add(podTimeout))
}
}
return false
}
Expand Down

0 comments on commit c2203f5

Please sign in to comment.