Skip to content

Commit

Permalink
Check on-disk bitmap status on StartVmBackup and on TransferDiskImage
Browse files Browse the repository at this point in the history
Sometimes a bitmap can become invalid/corrupt without oVirt noticing
this. For example if a hypervisor crashes, the active bitmap will become
invalid.
This means that the qcow2 volume does not contain the bitmap anymore,
but oVirt thinks the bitmap still exists because it's in the database.

This will currently cause oVirt to fail to create a NBDServer, as it
will error with 'Bitmap does not exist in ...'.

We use the ListVolumeBitmaps to get all the on-disk bitmaps and compare
them with the bitmaps in the oVirt database. If there is some
inconsistency we remove all the bitmaps/checkpoints.

Signed-off-by: Jean-Louis Dupond <[email protected]>
  • Loading branch information
dupondje committed Jun 8, 2023
1 parent dc41832 commit 7b8763f
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import static org.ovirt.engine.core.bll.storage.disk.image.DisksFilter.ONLY_NOT_SHAREABLE;
import static org.ovirt.engine.core.bll.storage.disk.image.DisksFilter.ONLY_SNAPABLE;

import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
Expand Down Expand Up @@ -39,8 +40,10 @@
import org.ovirt.engine.core.common.FeatureSupported;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.VmBackupParameters;
import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters;
Expand Down Expand Up @@ -80,6 +83,7 @@
import org.ovirt.engine.core.utils.ReplacementUtils;
import org.ovirt.engine.core.utils.lock.EngineLock;
import org.ovirt.engine.core.utils.transaction.TransactionSupport;
import org.ovirt.engine.core.vdsbroker.irsbroker.UUIDListReturn;
import org.ovirt.engine.core.vdsbroker.irsbroker.VmBackupInfo;
import org.ovirt.engine.core.vdsbroker.vdsbroker.PrepareImageReturn;

Expand Down Expand Up @@ -264,6 +268,15 @@ protected void executeCommand() {
Guid vmBackupId = createVmBackup();
log.info("Created VmBackup entity '{}'", vmBackupId);

log.info("Checking VM checkpoint '{}' for VM '{}'", vmBackup.getFromCheckpointId(), vmBackup.getVmId());
if (!validateCheckpoint(vmBackup.getFromCheckpointId())) {
addCustomValue("backupId", vmBackupId.toString());
auditLogDirector.log(this, AuditLogType.VM_INCREMENTAL_BACKUP_FAILED_FULL_VM_BACKUP_NEEDED);
setCommandStatus(CommandStatus.FAILED);
return;
}
log.info("Previous VM checkpoint '{}' for VM '{}' is valid", vmBackup.getFromCheckpointId(), vmBackup.getVmId());

if (isLiveBackup()) {
log.info("Redefine previous VM checkpoints for VM '{}'", vmBackup.getVmId());
if (!redefineVmCheckpoints()) {
Expand Down Expand Up @@ -368,6 +381,52 @@ public boolean performNextOperation(int completedChildCount) {
return true;
}

private boolean validateCheckpoint(Guid checkpointId) {
List<DiskImage> images = vmCheckpointDao.getDisksByCheckpointId(checkpointId);
/* Check if the checkpoint is still there on each volume/image */
for (DiskImage image : images) {
VdsmImageLocationInfo locationInfo = new VdsmImageLocationInfo(
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
null);

VolumeBitmapCommandParameters parameters =
new VolumeBitmapCommandParameters(
getStoragePoolId(),
locationInfo,
null);
parameters.setVdsId(getVdsId());
parameters.setEndProcedure(ActionParametersBase.EndProcedure.COMMAND_MANAGED);
parameters.setParentCommand(getActionType());
parameters.setParentParameters(getParameters());

ActionReturnValue returnValue = runInternalActionWithTasksContext(ActionType.ListVolumeBitmaps, parameters);
boolean valid = false;
if (returnValue.getSucceeded()) {
UUIDListReturn bitmaps = returnValue.getActionReturnValue();
valid = Arrays.stream(bitmaps.getUUIDList()).anyMatch(checkpointId.toString()::equals);
}
/* ListVolumeBitmaps failed or bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist for disk '{}'. Removing checkpoints",
checkpointId,
image.getId());
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
}
return true;
}

private boolean redefineVmCheckpoints() {
VmBackupParameters parameters = new VmBackupParameters(getParameters().getVmBackup());
parameters.setParentCommand(getActionType());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.ovirt.engine.core.bll.storage.disk.image;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -32,17 +33,21 @@
import org.ovirt.engine.core.common.AuditLogType;
import org.ovirt.engine.core.common.VdcObjectType;
import org.ovirt.engine.core.common.action.ActionParametersBase;
import org.ovirt.engine.core.common.action.ActionParametersBase.EndProcedure;
import org.ovirt.engine.core.common.action.ActionReturnValue;
import org.ovirt.engine.core.common.action.ActionType;
import org.ovirt.engine.core.common.action.AddDiskParameters;
import org.ovirt.engine.core.common.action.DeleteAllVmCheckpointsParameters;
import org.ovirt.engine.core.common.action.LockProperties;
import org.ovirt.engine.core.common.action.RemoveDiskParameters;
import org.ovirt.engine.core.common.action.TransferDiskImageParameters;
import org.ovirt.engine.core.common.action.TransferImageStatusParameters;
import org.ovirt.engine.core.common.action.VolumeBitmapCommandParameters;
import org.ovirt.engine.core.common.businessentities.ActionGroup;
import org.ovirt.engine.core.common.businessentities.StorageDomain;
import org.ovirt.engine.core.common.businessentities.VDS;
import org.ovirt.engine.core.common.businessentities.VM;
import org.ovirt.engine.core.common.businessentities.VdsmImageLocationInfo;
import org.ovirt.engine.core.common.businessentities.VmBackup;
import org.ovirt.engine.core.common.businessentities.VmBackupPhase;
import org.ovirt.engine.core.common.businessentities.storage.DiskBackupMode;
Expand Down Expand Up @@ -92,6 +97,7 @@
import org.ovirt.engine.core.utils.EngineLocalConfig;
import org.ovirt.engine.core.utils.ReplacementUtils;
import org.ovirt.engine.core.vdsbroker.ResourceManager;
import org.ovirt.engine.core.vdsbroker.irsbroker.UUIDListReturn;
import org.ovirt.engine.core.vdsbroker.vdsbroker.PrepareImageReturn;

@NonTransactiveCommandAttribute
Expand Down Expand Up @@ -259,7 +265,54 @@ private PrepareImageVDSCommandParameters getPrepareParameters(Guid vdsId) {
getDiskImage().getImageId(), true);
}

private boolean validateBitmap(DiskImage image, Guid checkpointId) {
VdsmImageLocationInfo locationInfo = new VdsmImageLocationInfo(
image.getStorageIds().get(0),
image.getId(),
image.getImageId(),
null);

VolumeBitmapCommandParameters parameters =
new VolumeBitmapCommandParameters(
getStoragePoolId(),
locationInfo,
null);
parameters.setVdsId(getVdsId());
parameters.setEndProcedure(ActionParametersBase.EndProcedure.COMMAND_MANAGED);
parameters.setParentCommand(getActionType());
parameters.setParentParameters(getParameters());

ActionReturnValue returnValue = runInternalActionWithTasksContext(ActionType.ListVolumeBitmaps, parameters);
boolean valid = false;
if (returnValue.getSucceeded()) {
UUIDListReturn bitmaps = returnValue.getActionReturnValue();
valid = Arrays.stream(bitmaps.getUUIDList()).anyMatch(checkpointId.toString()::equals);
}
/* ListVolumeBitmaps failed or bitmap did not exist on disk -> Remove checkpoints */
if (!valid) {
log.error("Checkpoint '{}' does not exist on disk '{}'. Removing all checkpoints.",
checkpointId,
image.getId(),
checkpointId);
/* Some checkpoint corruption, remove checkpoints */
DeleteAllVmCheckpointsParameters deleteAllVmCheckpointsParameters =
new DeleteAllVmCheckpointsParameters(getVmId(), List.of(image));
deleteAllVmCheckpointsParameters.setParentCommand(getActionType());
deleteAllVmCheckpointsParameters.setParentParameters(getParameters());
deleteAllVmCheckpointsParameters.setEndProcedure(EndProcedure.COMMAND_MANAGED);
deleteAllVmCheckpointsParameters.setForce(true);

runInternalAction(ActionType.DeleteAllVmCheckpoints, deleteAllVmCheckpointsParameters);
return false;
}
return true;
}

private Guid getBitmap() {
if (!validateBitmap(getDiskImage(), getBackup().getFromCheckpointId())) {
return null;
}

if (isHybridBackup() && getDiskImage().getBackupMode() == DiskBackupMode.Incremental) {
return getBackup().getFromCheckpointId();
}
Expand Down

0 comments on commit 7b8763f

Please sign in to comment.