Skip to content

Commit

Permalink
MTV-1543 | Fix warm migration scheduler
Browse files Browse the repository at this point in the history
Issue:
When running the warm migration with VMs more than the MaxInFlight disks.
The VMs over this number won't start the migration till the cutover.
Once the cutover is started the VMs which were not started migrate same
as in the old migration. So there is larger downtime.

Fix:
Ignore the cost when in `CopyingPaused`, this allows the other
Migrations to start as the cost is reduced when the VM disk transfer is
finished.

Note:
This patch also improves the cold migration as it ignores the count when
creating the VM so the other VM migrations can get started.
And dramaticially improves the warm migration time as we are not waiting
for the guest conversion as we already have the disk so we start the
guest conversion and do not halt the scheduler.

Fixes: https://issues.redhat.com/browse/MTV-1543

Signed-off-by: Martin Necas <[email protected]>
  • Loading branch information
mnecas committed Oct 7, 2024
1 parent c2f7d64 commit b60e467
Showing 1 changed file with 39 additions and 10 deletions.
49 changes: 39 additions & 10 deletions pkg/controller/plan/scheduler/vsphere/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ import (
liberr "github.com/konveyor/forklift-controller/pkg/lib/error"
)

// Phases.
const (
CopyingPaused = "CopyingPaused"
CreateGuestConversionPod = "CreateGuestConversionPod"
ConvertGuest = "ConvertGuest"
CreateVM = "CreateVM"
PostHook = "PostHook"
Completed = "Completed"
)

// Steps.
const (
DiskTransfer = "DiskTransfer"
)

// Package level mutex to ensure that
// multiple concurrent reconciles don't
// attempt to schedule VMs into the same
Expand Down Expand Up @@ -107,7 +122,7 @@ func (r *Scheduler) buildInFlight() (err error) {
return
}
if vmStatus.Running() {
r.inFlight[vm.Host] += r.cost(vm)
r.inFlight[vm.Host] += r.cost(vm, vmStatus)
}
}

Expand Down Expand Up @@ -153,7 +168,7 @@ func (r *Scheduler) buildInFlight() (err error) {
}
return err
}
r.inFlight[vm.Host] += r.cost(vm)
r.inFlight[vm.Host] += r.cost(vm, vmStatus)
}
}

Expand All @@ -170,25 +185,39 @@ func (r *Scheduler) buildPending() (err error) {
if err != nil {
return
}

if !vmStatus.MarkedStarted() && !vmStatus.MarkedCompleted() {
pending := &pendingVM{
status: vmStatus,
cost: r.cost(vm),
cost: r.cost(vm, vmStatus),
}
r.pending[vm.Host] = append(r.pending[vm.Host], pending)
}
}
return
}

func (r *Scheduler) cost(vm *model.VM) int {
if coldLocal, _ := r.Plan.VSphereColdLocal(); coldLocal {
/// virt-v2v transfers one disk at a time
return 1
func (r *Scheduler) cost(vm *model.VM, vmStatus *plan.VMStatus) int {
coldLocal, _ := r.Plan.VSphereColdLocal()
if coldLocal {
switch vmStatus.Phase {
case CreateVM, PostHook, Completed:
// In these phases we already have the disk transferred and are left only to create the VM
// By setting the cost to 0 other VMs can start migrating
return 0
default:
return 1
}
} else {
// CDI transfers the disks in parallel by different pods
return len(vm.Disks)
switch vmStatus.Phase {
case CopyingPaused, CreateVM, PostHook, Completed, ConvertGuest, CreateGuestConversionPod:
// The warm/remote migrations this is done on already transferred disks,
// and we can start other VM migrations at these point.
// By setting the cost to 0 other VMs can start migrating
return 0
default:
// CDI transfers the disks in parallel by different pods
return len(vm.Disks)
}
}
}

Expand Down

0 comments on commit b60e467

Please sign in to comment.