Skip to content

Commit

Permalink
Wait for 15 seconds for the qmp listener
Browse files Browse the repository at this point in the history
Make the code consitently wait/retry for 15 seconds - in some cases it
would only wait for 3 seconds. Fixes timeout on slow devices.

Signed-off-by: eriknordmark <[email protected]>
  • Loading branch information
eriknordmark committed Oct 9, 2024
1 parent 4357ccb commit c44d2b4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
20 changes: 10 additions & 10 deletions pkg/pillar/hypervisor/kvm.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2023 Zededa, Inc.
// Copyright (c) 2017-2024 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package hypervisor
Expand Down Expand Up @@ -1163,26 +1163,26 @@ func waitForQmp(domainName string, available bool) error {
delay := time.Second
var waited time.Duration
var err error
sock := GetQmpExecutorSocket(domainName)

for {
if waited > maxDelay {
// Give up
logrus.Warnf("waitForQmp for %s %t: giving up", domainName, available)
if available {
return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v", sock, domainName, err)
}
return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available", domainName, sock)
}
logrus.Infof("waitForQmp for %s %t: waiting for %v", domainName, available, delay)
if delay != 0 {
time.Sleep(delay)
waited += delay
}
sock := GetQmpExecutorSocket(domainName)
if _, err = getQemuStatus(sock); available == (err == nil) {
logrus.Infof("waitForQmp for %s %t done", domainName, available)
return nil
}
if waited > maxDelay {
// Give up
logrus.Warnf("waitForQmp for %s %t: giving up", domainName, available)
if available {
return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v", sock, domainName, err)
}
return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available", domainName, sock)
}
delay = 2 * delay
if delay > time.Minute {
delay = time.Minute
Expand Down
21 changes: 14 additions & 7 deletions pkg/pillar/hypervisor/qmp.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Copyright (c) 2020-2024 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package hypervisor

import (
Expand All @@ -15,10 +18,14 @@ import (
// this package implements subset of
// https://qemu.weilnetz.de/doc/qemu-qmp-ref.html

const sockTimeout = 10 * time.Second
const (
sockTimeout = 10 * time.Second
qmpRetries = 5
qmpRetrySleep = 3 * time.Second
)

func execRawCmd(socket, cmd string) ([]byte, error) {
var retry = 3
var retry = qmpRetries
logrus.Debugf("executing QMP command: %s", cmd)
var err error
var monitor *qmp.SocketMonitor
Expand All @@ -28,7 +35,7 @@ func execRawCmd(socket, cmd string) ([]byte, error) {
break
}
retry = retry - 1
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
}

if err != nil {
Expand Down Expand Up @@ -121,13 +128,13 @@ func getQemuStatus(socket string) (types.SwState, error) {
// the status is unexpected, EVE stops QEMU and game over.
var errs error
state := types.UNKNOWN
for attempt := 1; attempt <= 3; attempt++ {
for attempt := 1; attempt <= qmpRetries; attempt++ {
raw, err := execRawCmd(socket, `{ "execute": "query-status" }`)
if err != nil {
err = fmt.Errorf("[attempt %d] qmp status failed for QMP socket '%s': err: '%v'; (JSON response: '%s')",
attempt, socket, err, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}

Expand All @@ -146,15 +153,15 @@ func getQemuStatus(socket string) (types.SwState, error) {
err = fmt.Errorf("[attempt %d] failed to parse QMP status response for QMP socket '%s': err: '%v'; (JSON response: '%s')",
attempt, socket, err, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}
var matched bool
if state, matched = qmpStatusMap[result.Return.Status]; !matched {
err = fmt.Errorf("[attempt %d] unknown QMP status '%s' for QMP socket '%s'; (JSON response: '%s')",
attempt, result.Return.Status, socket, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}

Expand Down

0 comments on commit c44d2b4

Please sign in to comment.