From 06ed7ca69ac4480049975e595f7ad132a37d6d67 Mon Sep 17 00:00:00 2001 From: Jean-Louis Dupond Date: Thu, 8 Jun 2023 11:47:57 +0200 Subject: [PATCH] Check on-disk bitmap status on StartVmBackup and on TransferDiskImage Sometimes a bitmap can become invalid/corrupt without oVirt noticing this. For example if a hypervisor crashes, the active bitmap will become invalid. This means that the qcow2 volume does not contain the bitmap anymore, but oVirt thinks the bitmap still exists because it's in the database. This will currently cause oVirt to fail to create a NBDServer, as it will error with 'Bitmap does not exist in ...'. We use the QemuImageInfo to get all the on-disk bitmaps and compare them with the bitmaps in the oVirt database. If there is some inconsistency we remove all the bitmaps/checkpoints. Signed-off-by: Jean-Louis Dupond --- .../storage/backup/StartVmBackupCommand.java | 63 ++++++++++++++++++- .../disk/image/TransferDiskImageCommand.java | 43 +++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/backup/StartVmBackupCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/backup/StartVmBackupCommand.java index 60fc7fdfd2f..2f4a70640d7 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/backup/StartVmBackupCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/backup/StartVmBackupCommand.java @@ -39,8 +39,10 @@ import org.ovirt.engine.core.common.FeatureSupported; import org.ovirt.engine.core.common.VdcObjectType; import org.ovirt.engine.core.common.action.ActionParametersBase; +import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure; import org.ovirt.engine.core.common.action.ActionReturnValue; import org.ovirt.engine.core.common.action.ActionType; +import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters; import org.ovirt.engine.core.common.action.LockProperties; import org.ovirt.engine.core.common.action.VmBackupParameters; import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters; @@ -58,6 +60,8 @@ import org.ovirt.engine.core.common.businessentities.storage.DiskBackupMode; import org.ovirt.engine.core.common.businessentities.storage.DiskImage; import org.ovirt.engine.core.common.businessentities.storage.ImageStatus; +import org.ovirt.engine.core.common.businessentities.storage.Qcow2BitmapInfo; +import org.ovirt.engine.core.common.businessentities.storage.QemuImageInfo; import org.ovirt.engine.core.common.businessentities.storage.VmBackupType; import org.ovirt.engine.core.common.errors.EngineException; import org.ovirt.engine.core.common.errors.EngineMessage; @@ -264,6 +268,24 @@ protected void executeCommand() { Guid vmBackupId = createVmBackup(); log.info("Created VmBackup entity '{}'", vmBackupId); + // Set a VDS to be able to gather Qemu Image Info + if (getVds() == null) { + setHostForColdBackupOperation(); + } + if (getVds().isQemuImageInfoBitmaps()) { + log.info("Checking VM checkpoint '{}' for VM '{}'", vmBackup.getFromCheckpointId(), vmBackup.getVmId()); + if (!validateCheckpoint(vmBackup.getFromCheckpointId())) { + addCustomValue("backupId", vmBackupId.toString()); + auditLogDirector.log(this, AuditLogType.VM_INCREMENTAL_BACKUP_FAILED_FULL_VM_BACKUP_NEEDED); + setCommandStatus(CommandStatus.FAILED); + return; + } + log.info("Previous VM checkpoint '{}' for VM '{}' is valid", vmBackup.getFromCheckpointId(), vmBackup.getVmId()); + } else { + log.info("Could not check VM checkpoint '{}' for VM '{}' due to missing bitmap info support in vdsm", + vmBackup.getFromCheckpointId(), vmBackup.getVmId()); + } + if (isLiveBackup()) { log.info("Redefine previous VM checkpoints for VM '{}'", vmBackup.getVmId()); if (!redefineVmCheckpoints()) { @@ -368,6 +390,45 @@ public boolean performNextOperation(int completedChildCount) { return true; } + private boolean validateCheckpoint(Guid checkpointId) { + List images = vmCheckpointDao.getDisksByCheckpointId(checkpointId); + /* Check if the checkpoint is still there on each volume/image */ + for (DiskImage image : images) { + QemuImageInfo qcow2Info = imagesHandler.getQemuImageInfoFromVdsm( + getStoragePoolId(), + image.getStorageIds().get(0), + image.getId(), + image.getImageId(), + getParameters().getVdsRunningOn(), + !isLiveBackup()); + + boolean valid = false; + if (qcow2Info != null) { + List bitmaps = qcow2Info.getQcow2bitmaps(); + if (bitmaps != null) { + valid = bitmaps.stream().anyMatch(bitmap -> bitmap.getName().equals(checkpointId)); + } + } + /* Bitmap did not exist on disk -> Remove checkpoints */ + if (!valid) { + log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoints", + checkpointId, + image.getId()); + /* Some checkpoint corruption, remove checkpoints */ + DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters = + new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image)); + deleteAllVmCheckpointsParameters.setParentCommand(getActionType()); + deleteAllVmCheckpointsParameters.setParentParameters(getParameters()); + deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED); + deleteAllVmCheckpointsParameters.setForce(true); + + runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters); + return false; + } + } + return true; + } + private boolean redefineVmCheckpoints() { VmBackupParameters parameters = new VmBackupParameters(getParameters().getVmBackup()); parameters.setParentCommand(getActionType()); @@ -380,8 +441,6 @@ private boolean redefineVmCheckpoints() { } private boolean startAddBitmapJobs() { - setHostForColdBackupOperation(); - VmBackup vmBackup = getParameters().getVmBackup(); if (getParameters().getVdsRunningOn() == null) { log.error("Failed to find host to run cold backup operation for VM '{}'", vmBackup.getVmId()); diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/disk/image/TransferDiskImageCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/disk/image/TransferDiskImageCommand.java index 1c6c42f0795..83812db8200 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/disk/image/TransferDiskImageCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/disk/image/TransferDiskImageCommand.java @@ -32,9 +32,11 @@ import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.VdcObjectType; import org.ovirt.engine.core.common.action.ActionParametersBase; +import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure; import org.ovirt.engine.core.common.action.ActionReturnValue; import org.ovirt.engine.core.common.action.ActionType; import org.ovirt.engine.core.common.action.AddDiskParameters; +import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters; import org.ovirt.engine.core.common.action.LockProperties; import org.ovirt.engine.core.common.action.RemoveDiskParameters; import org.ovirt.engine.core.common.action.TransferDiskImageParameters; @@ -54,6 +56,8 @@ import org.ovirt.engine.core.common.businessentities.storage.ImageTransfer; import org.ovirt.engine.core.common.businessentities.storage.ImageTransferBackend; import org.ovirt.engine.core.common.businessentities.storage.ImageTransferPhase; +import org.ovirt.engine.core.common.businessentities.storage.Qcow2BitmapInfo; +import org.ovirt.engine.core.common.businessentities.storage.QemuImageInfo; import org.ovirt.engine.core.common.businessentities.storage.TimeoutPolicyType; import org.ovirt.engine.core.common.businessentities.storage.TransferType; import org.ovirt.engine.core.common.businessentities.storage.VmBackupType; @@ -259,7 +263,46 @@ private PrepareImageVDSCommandParameters getPrepareParameters(Guid vdsId) { getDiskImage().getImageId(), true); } + private boolean validateBitmap(DiskImage image, Guid checkpointId) { + QemuImageInfo qcow2Info = imagesHandler.getQemuImageInfoFromVdsm( + getStoragePoolId(), + image.getStorageIds().get(0), + image.getId(), + image.getImageId(), + getVdsId(), + !isLiveBackup()); + + boolean valid = false; + if (qcow2Info != null) { + List bitmaps = qcow2Info.getQcow2bitmaps(); + if (bitmaps != null) { + valid = bitmaps.stream().anyMatch(bitmap -> bitmap.getName().equals(checkpointId)); + } + } + /* Bitmap did not exist on disk -> Remove checkpoints */ + if (!valid) { + log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoints", + checkpointId, + image.getId()); + /* Some checkpoint corruption, remove checkpoints */ + DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters = + new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image)); + deleteAllVmCheckpointsParameters.setParentCommand(getActionType()); + deleteAllVmCheckpointsParameters.setParentParameters(getParameters()); + deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED); + deleteAllVmCheckpointsParameters.setForce(true); + + runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters); + return false; + } + return true; + } + private Guid getBitmap() { + if (!validateBitmap(getDiskImage(), getBackup().getFromCheckpointId())) { + return null; + } + if (isHybridBackup() && getDiskImage().getBackupMode() == DiskBackupMode.Incremental) { return getBackup().getFromCheckpointId(); }