diff --git a/tests/suites/controller/enable_ha.sh b/tests/suites/controller/enable_ha.sh index 4a45e0ea2bb..d2972299ff0 100644 --- a/tests/suites/controller/enable_ha.sh +++ b/tests/suites/controller/enable_ha.sh @@ -25,51 +25,6 @@ wait_for_controller_machines() { fi } -wait_for_controller_machines_tear_down() { - amount=${1} - - attempt=0 - # shellcheck disable=SC2143 - until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "started") | .current' | wc -l | grep "${amount}")" ]]; do - echo "[+] (attempt ${attempt}) polling started machines during ha tear down" - juju machines -m controller 2>&1 | sed 's/^/ | /g' || true - sleep "${SHORT_TIMEOUT}" - attempt=$((attempt + 1)) - - if [[ ${attempt} -gt 25 ]]; then - echo "enable-ha failed waiting for only 1 started machine" - exit 1 - fi - done - - attempt=0 - # shellcheck disable=SC2143 - until [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "stopped") | .current' | wc -l | grep 0)" ]]; do - echo "[+] (attempt ${attempt}) polling stopped machines during ha tear down" - juju machines -m controller 2>&1 | sed 's/^/ | /g' || true - sleep "${SHORT_TIMEOUT}" - attempt=$((attempt + 1)) - - if [[ ${attempt} -gt 25 ]]; then - echo "enable-ha failed waiting for machines to tear down" - exit 1 - fi - done - - if [[ "$(juju machines -m controller --format=json | jq -r '.machines | .[] | .["juju-status"] | select(.current == "error") | .current' | wc -l)" -gt 0 ]]; then - echo "machine in controller model with error during ha tear down" - juju machines -m controller 2>&1 | sed 's/^/ | /g' || true - exit 1 - fi - - if [[ ${attempt} -gt 0 ]]; then - echo "[+] $(green 'Completed polling machines')" - juju machines -m controller 2>&1 | sed 's/^/ | /g' - - sleep "${SHORT_TIMEOUT}" - fi -} - wait_for_ha() { amount=${1} @@ -97,6 +52,16 @@ wait_for_ha() { fi } +wait_for_controller_no_leader() { + # We need to wait for the Dqlite cluster to be broken (loss of quorum), + # before we start waiting for the backstop behaviour to be pending + # (see wait_for_controller_leader below). + # shellcheck disable=SC2143 + until ! [[ "$(juju exec -m controller --unit controller/leader uptime | grep load)" ]]; do + echo "[+] waiting for no controller leadership" + done +} + wait_for_controller_leader() { # Since the institution of Dqlite for leases, we need to wait until the # backstop workflow has run before we are functional with a single @@ -139,13 +104,12 @@ run_enable_ha() { juju remove-machine -m controller 1 juju remove-machine -m controller 2 - wait_for_controller_machines_tear_down 1 + wait_for_controller_no_leader + wait_for_controller_leader # Ensure that we have no ha enabled machines. juju show-controller --format=json | jq -r '.[] | .["controller-machines"] | reduce(.[] | select(.["instance-id"] == null)) as $i (0;.+=1)' | grep 0 - wait_for_controller_leader - destroy_model "enable-ha" }