Skip to content

Commit

Permalink
Keep the populator pod on failure
Browse files Browse the repository at this point in the history
This patch will keep the populator pod existence when failing. It won't
restart or recreated. This is in the mind of aligning with v2v pod
failure and the thought that once we fail, we will most likely keep
failing. This change will allow to get the populator logs in case of
failure in a easy way.

Signed-off-by: Liran Rotenberg <[email protected]>
  • Loading branch information
liranr23 committed Oct 31, 2023
1 parent a55e08e commit 7828ad1
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 10 deletions.
18 changes: 16 additions & 2 deletions pkg/controller/plan/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,7 @@ func (r *Migration) updateCopyProgress(vm *plan.VMStatus, step *plan.Step) (err
if r.Plan.Spec.Warm && len(importer.Status.ContainerStatuses) > 0 {
vm.Warm.Failures = int(importer.Status.ContainerStatuses[0].RestartCount)
}
if RestartLimitExceeded(importer) {
if restartLimitExceeded(importer) {
task.MarkedCompleted()
msg, _ := terminationMessage(importer)
task.AddError(msg)
Expand Down Expand Up @@ -1598,6 +1598,10 @@ func (r *Migration) updatePopulatorCopyProgress(vm *plan.VMStatus, step *plan.St
if err != nil {
return
}
populatorPods, err := r.kubevirt.getPopulatorPods()
if err != nil {
return
}

for _, pvc := range pvcs {
if _, ok := pvc.Annotations["lun"]; ok {
Expand All @@ -1616,6 +1620,16 @@ func (r *Migration) updatePopulatorCopyProgress(vm *plan.VMStatus, step *plan.St
continue
}

for _, pod := range populatorPods {
pvcId := strings.Split(pod.Name, "populate-")[1]
if string(pvc.UID) != pvcId {
continue
}
if pod.Status.Phase == core.PodFailed {
return fmt.Errorf("populator pod %s/%s failed for PVC %s. Please check the pod logs.", pod.Namespace, pod.Name, pvcId)
}
}

if pvc.Status.Phase == core.ClaimBound {
task.Phase = Completed
task.Reason = TransferCompleted
Expand Down Expand Up @@ -1701,7 +1715,7 @@ func terminationMessage(pod *core.Pod) (msg string, ok bool) {
}

// Return whether the pod has failed and restarted too many times.
func RestartLimitExceeded(pod *core.Pod) (exceeded bool) {
func restartLimitExceeded(pod *core.Pod) (exceeded bool) {
if len(pod.Status.ContainerStatuses) == 0 {
return
}
Expand Down
8 changes: 0 additions & 8 deletions pkg/lib-volume-populator/populator-machinery/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"time"

"github.com/konveyor/forklift-controller/pkg/apis/forklift/v1beta1"
"github.com/konveyor/forklift-controller/pkg/controller/plan"
corev1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -698,13 +697,6 @@ func (c *controller) syncPvc(ctx context.Context, key, pvcNamespace, pvcName str
if corev1.PodSucceeded != pod.Status.Phase {
if corev1.PodFailed == pod.Status.Phase {
c.recorder.Eventf(pvc, corev1.EventTypeWarning, reasonPodFailed, "Populator failed: %s", pod.Status.Message)
// Delete failed pods so we can try again
if !plan.RestartLimitExceeded(pod) {
err = c.kubeClient.CoreV1().Pods(populatorNamespace).Delete(ctx, pod.Name, metav1.DeleteOptions{})
if err != nil {
return err
}
}
}
// We'll get called again later when the pod succeeds
return nil
Expand Down

0 comments on commit 7828ad1

Please sign in to comment.