diff --git a/pkg/pillar/hypervisor/kvm.go b/pkg/pillar/hypervisor/kvm.go index 8c4483fd15..9bf8fca9b2 100644 --- a/pkg/pillar/hypervisor/kvm.go +++ b/pkg/pillar/hypervisor/kvm.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2023 Zededa, Inc. +// Copyright (c) 2017-2024 Zededa, Inc. // SPDX-License-Identifier: Apache-2.0 package hypervisor @@ -1163,26 +1163,26 @@ func waitForQmp(domainName string, available bool) error { delay := time.Second var waited time.Duration var err error + sock := GetQmpExecutorSocket(domainName) for { + if waited > maxDelay { + // Give up + logrus.Warnf("waitForQmp for %s %t: giving up", domainName, available) + if available { + return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v", sock, domainName, err) + } + return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available", domainName, sock) + } logrus.Infof("waitForQmp for %s %t: waiting for %v", domainName, available, delay) if delay != 0 { time.Sleep(delay) waited += delay } - sock := GetQmpExecutorSocket(domainName) if _, err = getQemuStatus(sock); available == (err == nil) { logrus.Infof("waitForQmp for %s %t done", domainName, available) return nil } - if waited > maxDelay { - // Give up - logrus.Warnf("waitForQmp for %s %t: giving up", domainName, available) - if available { - return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v", sock, domainName, err) - } - return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available", domainName, sock) - } delay = 2 * delay if delay > time.Minute { delay = time.Minute diff --git a/pkg/pillar/hypervisor/qmp.go b/pkg/pillar/hypervisor/qmp.go index 4f3f5d223e..843e3fa3b6 100644 --- a/pkg/pillar/hypervisor/qmp.go +++ b/pkg/pillar/hypervisor/qmp.go @@ -1,3 +1,6 @@ +// Copyright (c) 2020-2024 Zededa, Inc. +// SPDX-License-Identifier: Apache-2.0 + package hypervisor import ( @@ -15,10 +18,14 @@ import ( // this package implements subset of // https://qemu.weilnetz.de/doc/qemu-qmp-ref.html -const sockTimeout = 10 * time.Second +const ( + sockTimeout = 10 * time.Second + qmpRetries = 5 + qmpRetrySleep = 3 * time.Second +) func execRawCmd(socket, cmd string) ([]byte, error) { - var retry = 3 + var retry = qmpRetries logrus.Debugf("executing QMP command: %s", cmd) var err error var monitor *qmp.SocketMonitor @@ -28,7 +35,7 @@ func execRawCmd(socket, cmd string) ([]byte, error) { break } retry = retry - 1 - time.Sleep(time.Second) + time.Sleep(qmpRetrySleep) } if err != nil { @@ -121,13 +128,13 @@ func getQemuStatus(socket string) (types.SwState, error) { // the status is unexpected, EVE stops QEMU and game over. var errs error state := types.UNKNOWN - for attempt := 1; attempt <= 3; attempt++ { + for attempt := 1; attempt <= qmpRetries; attempt++ { raw, err := execRawCmd(socket, `{ "execute": "query-status" }`) if err != nil { err = fmt.Errorf("[attempt %d] qmp status failed for QMP socket '%s': err: '%v'; (JSON response: '%s')", attempt, socket, err, raw) errs = joinErrors(errs, err) - time.Sleep(time.Second) + time.Sleep(qmpRetrySleep) continue } @@ -146,7 +153,7 @@ func getQemuStatus(socket string) (types.SwState, error) { err = fmt.Errorf("[attempt %d] failed to parse QMP status response for QMP socket '%s': err: '%v'; (JSON response: '%s')", attempt, socket, err, raw) errs = joinErrors(errs, err) - time.Sleep(time.Second) + time.Sleep(qmpRetrySleep) continue } var matched bool @@ -154,7 +161,7 @@ func getQemuStatus(socket string) (types.SwState, error) { err = fmt.Errorf("[attempt %d] unknown QMP status '%s' for QMP socket '%s'; (JSON response: '%s')", attempt, result.Return.Status, socket, raw) errs = joinErrors(errs, err) - time.Sleep(time.Second) + time.Sleep(qmpRetrySleep) continue }