From c7caf192c48573df3a47307b54d72c3b96e38182 Mon Sep 17 00:00:00 2001 From: Benny Zlotnik Date: Wed, 13 Sep 2023 15:59:43 +0300 Subject: [PATCH 1/4] openstack: wait for volume to be removed Before removing the snapshot attempt to make sure the volume has been removed Signed-off-by: Benny Zlotnik --- .../plan/adapter/openstack/BUILD.bazel | 1 + .../plan/adapter/openstack/client.go | 53 ++++++++++++++++--- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/pkg/controller/plan/adapter/openstack/BUILD.bazel b/pkg/controller/plan/adapter/openstack/BUILD.bazel index ea9a8b50b..4858b5486 100644 --- a/pkg/controller/plan/adapter/openstack/BUILD.bazel +++ b/pkg/controller/plan/adapter/openstack/BUILD.bazel @@ -32,6 +32,7 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:meta", "//vendor/k8s.io/apimachinery/pkg/labels", "//vendor/k8s.io/apimachinery/pkg/types", + "//vendor/k8s.io/apimachinery/pkg/util/wait", "//vendor/kubevirt.io/api/core/v1:core", "//vendor/kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1", "//vendor/sigs.k8s.io/controller-runtime/pkg/client", diff --git a/pkg/controller/plan/adapter/openstack/client.go b/pkg/controller/plan/adapter/openstack/client.go index 80e52f0bf..7e72ac171 100644 --- a/pkg/controller/plan/adapter/openstack/client.go +++ b/pkg/controller/plan/adapter/openstack/client.go @@ -3,6 +3,7 @@ package openstack import ( "errors" "strings" + "time" planapi "github.com/konveyor/forklift-controller/pkg/apis/forklift/v1beta1/plan" "github.com/konveyor/forklift-controller/pkg/apis/forklift/v1beta1/ref" @@ -10,6 +11,7 @@ import ( model "github.com/konveyor/forklift-controller/pkg/controller/provider/web/openstack" libclient "github.com/konveyor/forklift-controller/pkg/lib/client/openstack" liberr "github.com/konveyor/forklift-controller/pkg/lib/error" + "k8s.io/apimachinery/pkg/util/wait" cdi "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1" ) @@ -272,12 +274,7 @@ func (r *Client) PreTransferActions(vmRef ref.Ref) (ready bool, err error) { r.Log.Info("the image properties are in sync, cleaning the image", "vm", vm.Name, "image", inventoryImage.Name, "properties", inventoryImage.Properties) originalVolumeID := inventoryImage.Properties[forkliftPropertyOriginalVolumeID].(string) - err = r.cleanup(vm, originalVolumeID) - if err != nil { - r.Log.Error(err, "cleaning the image", - "vm", vm.Name, "image", image.Name) - return - } + go r.cleanup(vm, originalVolumeID) default: err = liberr.New("unexpected image status") r.Log.Error(err, "checking the image from volume", @@ -546,6 +543,50 @@ func (r *Client) cleanup(vm *libclient.VM, originalVolumeID string) (err error) err = nil return } + + // Now we need to wait for the volume to be removed + condition := func() (done bool, err error) { + volume, err := r.getVolumeFromSnapshot(vm, snapshot.ID) + if err != nil { + if errors.Is(err, ResourceNotFoundError) { + done = true + err = nil + return + } + err = liberr.Wrap(err) + r.Log.Error(err, "retrieving volume from snapshot information when cleaning up", + "vm", vm.Name, "snapshotID", snapshot.ID) + return + } + switch volume.Status { + case VolumeStatusDeleting: + r.Log.Info("the volume is still being deleted, waiting...", + "vm", vm.Name, "volume", volume.Name, "snapshot", volume.SnapshotID) + default: + err = UnexpectedVolumeStatusError + r.Log.Error(err, "checking the volume", + "vm", vm.Name, "volume", volume.Name, "status", volume.Status) + return + } + return + } + + // TODO add config + backoff := wait.Backoff{ + Duration: 3 * time.Second, + Factor: 1.1, + Steps: 100, + } + + err = wait.ExponentialBackoff(backoff, condition) + if err != nil { + err = liberr.Wrap(err) + r.Log.Error(err, "waiting for the volume to be removed", + "vm", vm.Name, "snapshotID", snapshot.ID) + err = nil + return + } + r.Log.Info("cleaning up the snapshot from volume", "vm", vm.Name, "originalVolumeID", originalVolumeID) err = r.removeSnapshotFromVolume(vm, originalVolumeID) From 0e2fb7fc16f4f99b30ef86c026866d776bec7df4 Mon Sep 17 00:00:00 2001 From: Benny Zlotnik Date: Wed, 20 Sep 2023 14:28:00 +0300 Subject: [PATCH 2/4] openstack: add log Signed-off-by: Benny Zlotnik --- pkg/controller/plan/adapter/openstack/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/plan/adapter/openstack/client.go b/pkg/controller/plan/adapter/openstack/client.go index 7e72ac171..18a6775a0 100644 --- a/pkg/controller/plan/adapter/openstack/client.go +++ b/pkg/controller/plan/adapter/openstack/client.go @@ -549,6 +549,7 @@ func (r *Client) cleanup(vm *libclient.VM, originalVolumeID string) (err error) volume, err := r.getVolumeFromSnapshot(vm, snapshot.ID) if err != nil { if errors.Is(err, ResourceNotFoundError) { + r.Log.Info("volume not found, we are done") done = true err = nil return From 39aadf611bd68e35b853a89672c043a51c2c6e0d Mon Sep 17 00:00:00 2001 From: Benny Zlotnik Date: Tue, 19 Dec 2023 15:52:53 +0200 Subject: [PATCH 3/4] fix linter complaint Signed-off-by: Benny Zlotnik --- pkg/controller/plan/adapter/openstack/client.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/controller/plan/adapter/openstack/client.go b/pkg/controller/plan/adapter/openstack/client.go index 18a6775a0..8a4e0d0f7 100644 --- a/pkg/controller/plan/adapter/openstack/client.go +++ b/pkg/controller/plan/adapter/openstack/client.go @@ -274,7 +274,13 @@ func (r *Client) PreTransferActions(vmRef ref.Ref) (ready bool, err error) { r.Log.Info("the image properties are in sync, cleaning the image", "vm", vm.Name, "image", inventoryImage.Name, "properties", inventoryImage.Properties) originalVolumeID := inventoryImage.Properties[forkliftPropertyOriginalVolumeID].(string) - go r.cleanup(vm, originalVolumeID) + go func() { + err := r.cleanup(vm, originalVolumeID) + if err != nil { + r.Log.Error(err, "failed to cleanup") + } + }() + default: err = liberr.New("unexpected image status") r.Log.Error(err, "checking the image from volume", From ecfa9f0a23d8215a9673b1376045169617cb9131 Mon Sep 17 00:00:00 2001 From: Benny Zlotnik Date: Mon, 25 Dec 2023 11:58:29 +0200 Subject: [PATCH 4/4] openstack: add config for retry attempts Signed-off-by: Benny Zlotnik --- operator/config/manager/manager.yaml | 2 ++ operator/roles/forkliftcontroller/defaults/main.yml | 1 + .../templates/controller/deployment-controller.yml.j2 | 4 ++++ pkg/controller/plan/adapter/openstack/BUILD.bazel | 1 + pkg/controller/plan/adapter/openstack/client.go | 9 ++++----- pkg/settings/migration.go | 6 ++++++ 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/operator/config/manager/manager.yaml b/operator/config/manager/manager.yaml index d218e6335..182fa586f 100644 --- a/operator/config/manager/manager.yaml +++ b/operator/config/manager/manager.yaml @@ -62,6 +62,8 @@ spec: value: ${SNAPSHOT_REMOVAL_TIMEOUT} - name: SNAPSHOT_STATUS_CHECK_RATE value: ${SNAPSHOT_STATUS_CHECK_RATE} + - name: CLEANUP_RETRIES + value: ${CLEANUP_RETRIES} - name: CDI_EXPORT_TOKEN_TTL value: ${CDI_EXPORT_TOKEN_TTL} - name: FILESYSTEM_OVERHEAD diff --git a/operator/roles/forkliftcontroller/defaults/main.yml b/operator/roles/forkliftcontroller/defaults/main.yml index 865cb821b..e99e67ed7 100644 --- a/operator/roles/forkliftcontroller/defaults/main.yml +++ b/operator/roles/forkliftcontroller/defaults/main.yml @@ -31,6 +31,7 @@ controller_log_level: 3 controller_precopy_interval: 60 controller_snapshot_removal_timeout_minuts: 120 controller_snapshot_status_check_rate_seconds: 10 +controller_cleanup_retries: 10 controller_vsphere_incremental_backup: true controller_ovirt_warm_migration: true controller_max_vm_inflight: 20 diff --git a/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 b/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 index f34e969b3..2f04178ba 100644 --- a/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 +++ b/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 @@ -68,6 +68,10 @@ spec: - name: SNAPSHOT_STATUS_CHECK_RATE_SECONDS value: "{{ controller_snapshot_status_check_rate_seconds }}" {% endif %} +{% if controller_cleanup_retries is number %} + - name: CLEANUP_RETRIES + value: "{{ controller_cleanup_retries }}" +{% endif %} {% if controller_max_vm_inflight is number %} - name: MAX_VM_INFLIGHT value: "{{ controller_max_vm_inflight }}" diff --git a/pkg/controller/plan/adapter/openstack/BUILD.bazel b/pkg/controller/plan/adapter/openstack/BUILD.bazel index 4858b5486..8c94d5f65 100644 --- a/pkg/controller/plan/adapter/openstack/BUILD.bazel +++ b/pkg/controller/plan/adapter/openstack/BUILD.bazel @@ -26,6 +26,7 @@ go_library( "//pkg/lib/client/openstack", "//pkg/lib/error", "//pkg/lib/itinerary", + "//pkg/settings", "//vendor/k8s.io/api/core/v1:core", "//vendor/k8s.io/apimachinery/pkg/api/errors", "//vendor/k8s.io/apimachinery/pkg/api/resource", diff --git a/pkg/controller/plan/adapter/openstack/client.go b/pkg/controller/plan/adapter/openstack/client.go index 8a4e0d0f7..2c4ccdbfb 100644 --- a/pkg/controller/plan/adapter/openstack/client.go +++ b/pkg/controller/plan/adapter/openstack/client.go @@ -11,6 +11,7 @@ import ( model "github.com/konveyor/forklift-controller/pkg/controller/provider/web/openstack" libclient "github.com/konveyor/forklift-controller/pkg/lib/client/openstack" liberr "github.com/konveyor/forklift-controller/pkg/lib/error" + "github.com/konveyor/forklift-controller/pkg/settings" "k8s.io/apimachinery/pkg/util/wait" cdi "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1" ) @@ -555,7 +556,7 @@ func (r *Client) cleanup(vm *libclient.VM, originalVolumeID string) (err error) volume, err := r.getVolumeFromSnapshot(vm, snapshot.ID) if err != nil { if errors.Is(err, ResourceNotFoundError) { - r.Log.Info("volume not found, we are done") + r.Log.Info("volume doesn't exist, assuming we are done") done = true err = nil return @@ -578,11 +579,10 @@ func (r *Client) cleanup(vm *libclient.VM, originalVolumeID string) (err error) return } - // TODO add config backoff := wait.Backoff{ Duration: 3 * time.Second, - Factor: 1.1, - Steps: 100, + Factor: 1.5, + Steps: settings.Settings.CleanupRetries, } err = wait.ExponentialBackoff(backoff, condition) @@ -590,7 +590,6 @@ func (r *Client) cleanup(vm *libclient.VM, originalVolumeID string) (err error) err = liberr.Wrap(err) r.Log.Error(err, "waiting for the volume to be removed", "vm", vm.Name, "snapshotID", snapshot.ID) - err = nil return } diff --git a/pkg/settings/migration.go b/pkg/settings/migration.go index 419eccd42..30b733464 100644 --- a/pkg/settings/migration.go +++ b/pkg/settings/migration.go @@ -22,6 +22,7 @@ const ( CDIExportTokenTTL = "CDI_EXPORT_TOKEN_TTL" FileSystemOverhead = "FILESYSTEM_OVERHEAD" BlockOverhead = "BLOCK_OVERHEAD" + CleanupRetries = "CLEANUP_RETRIES" ) // Migration settings @@ -49,6 +50,8 @@ type Migration struct { FileSystemOverhead int // Block fixed overhead size BlockOverhead int64 + // Cleanup retries + CleanupRetries int } // Load settings. @@ -71,6 +74,9 @@ func (r *Migration) Load() (err error) { if r.SnapshotStatusCheckRate, err = getPositiveEnvLimit(SnapshotStatusCheckRate, 10); err != nil { return liberr.Wrap(err) } + if r.CleanupRetries, err = getPositiveEnvLimit(CleanupRetries, 10); err != nil { + return liberr.Wrap(err) + } if virtV2vImage, ok := os.LookupEnv(VirtV2vImage); ok { if cold, warm, found := strings.Cut(virtV2vImage, "|"); found { r.VirtV2vImageCold = cold