From e068289c7a8beef3d1950d17d3a421d50d465702 Mon Sep 17 00:00:00 2001 From: Nikolay Martyanov Date: Tue, 28 May 2024 16:10:17 +0200 Subject: [PATCH] pillar: Release CPUs on domain activation failure. This commit addresses an issue where CPUs assigned to a domain within doActivate() are not released if the domain activation fails. The new logic ensures that CPUs are properly released and the CPU mask in the status is updated accordingly. This is achieved by introducing the releaseCPUs function and calling it in the appropriate error handling blocks within doActivate(). It is common for doActivate to fail in scenarios such as switching application profiles that share the same adapter. In such cases, the second application will fail to activate until the first one releases the necessary adapter. Signed-off-by: Nikolay Martyanov (cherry picked from commit 10c924ee953da4a5f1233bd22acf0bec02e85566) --- pkg/pillar/cmd/domainmgr/domainmgr.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pkg/pillar/cmd/domainmgr/domainmgr.go b/pkg/pillar/cmd/domainmgr/domainmgr.go index 777d57da54..7ffc0bea67 100644 --- a/pkg/pillar/cmd/domainmgr/domainmgr.go +++ b/pkg/pillar/cmd/domainmgr/domainmgr.go @@ -1261,6 +1261,9 @@ func updateNonPinnedCPUs(ctx *domainContext, config *types.DomainConfig, status return nil } +// assignCPUs assigns CPUs to the VM based on the configuration +// By the assignment, we mean that the CPUs are assigned in the CPUAllocator context to the given VM +// and the cpumask is updated in the *status* func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) error { if config.VmConfig.CPUsPinned { // Pin the CPU cpusToAssign, err := ctx.cpuAllocator.Allocate(config.UUIDandVersion.UUID, config.VCpus) @@ -1276,6 +1279,17 @@ func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.Do return nil } +// releaseCPUs releases the CPUs that were previously assigned to the VM. +// The cpumask in the *status* is updated accordingly, and the CPUs are released in the CPUAllocator context. +func releaseCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) { + if ctx.cpuPinningSupported && config.VmConfig.CPUsPinned && status.VmConfig.CPUs != "" { + if err := ctx.cpuAllocator.Free(config.UUIDandVersion.UUID); err != nil { + log.Errorf("Failed to free CPUs for %s: %s", config.DisplayName, err) + } + } + status.VmConfig.CPUs = "" +} + func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) { log.Functionf("handleCreate(%v) for %s", @@ -1461,6 +1475,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, status.PendingAdd = false status.SetErrorDescription(*errDescription) status.AdaptersFailed = true + releaseCPUs(ctx, &config, status) publishDomainStatus(ctx, status) releaseAdapters(ctx, config.IoAdapterList, config.UUIDandVersion.UUID, nil) @@ -1484,6 +1499,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, status.PendingAdd = false status.SetErrorNow(err.Error()) status.AdaptersFailed = true + releaseCPUs(ctx, &config, status) publishDomainStatus(ctx, status) releaseAdapters(ctx, config.IoAdapterList, config.UUIDandVersion.UUID, nil) @@ -1503,6 +1519,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, snapshotID, config.UUIDandVersion.UUID, err) log.Error(err.Error()) status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) return } @@ -1531,6 +1548,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, err := fmt.Errorf("doActivate: Failed to write cloud-init metadata file. Error %s", err) log.Error(err.Error()) status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) return } @@ -1541,6 +1559,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, err := fmt.Errorf("doActivate: Failed to apply cloud-init config. Error %s", err) log.Error(err.Error()) status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) return } } @@ -1556,6 +1575,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, if err != nil { log.Errorf("Failed to check disk format: %v", err.Error()) status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) return } } @@ -1573,6 +1593,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, log.Errorf("Failed to create DomainStatus from %v: %s", config, err) status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) return } @@ -1592,6 +1613,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, log.Errorf("DomainCreate for %s: %s", status.DomainName, err) status.BootFailed = true status.SetErrorNow(err.Error()) + releaseCPUs(ctx, &config, status) publishDomainStatus(ctx, status) return }