Skip to content

Commit

Permalink
Merge pull request #102 from buildkite-plugins/plt-2121/disable-error…
Browse files Browse the repository at this point in the history
…-trap-for-retries

Disable error traps on retries in environment hook
  • Loading branch information
lucaswilric authored Mar 6, 2024
2 parents dac8ffd + ac77ced commit ef92d80
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 2 deletions.
41 changes: 40 additions & 1 deletion hooks/environment
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
#!/usr/bin/env bash
set -eu -o pipefail

# Some callers slightly abuse this script by including it via Bash's `source` command, instead of calling it as an executable. This means this code is sometimes executed with extra Bash context (namely options and error traps) that change how the code is interpreted. This interference is not always desirable, but unfortunately we must tolerate usage via `source`.
#
# First we detect whether the `-E` option is set. This matters because when `-E` is combined with an error trap (as in the agent environment hook), it breaks this hook's retry functionality.
#
# The special $- variable is a list of all the options currently set. We find out if it contains 'E', and store the result in WAS_BIG_E_OPTION_SET_AT_START.
#
# We'll use WAS_BIG_E_OPTION_SET_AT_START later on, to decide if we need to remove and reinstate it around retries.
#
WAS_BIG_E_OPTION_SET_AT_START=no
if [[ "$-" =~ "E" ]]; then
WAS_BIG_E_OPTION_SET_AT_START=yes
fi

# Reads either a value or a list from plugin config
function plugin_read_list() {
local prefix="BUILDKITE_PLUGIN_ECR_$1"
Expand Down Expand Up @@ -47,8 +60,34 @@ function version_a_gte_b() {
return 0 # major same, minor same, patch same or more
}

# Retries a command on failure.
# Bash's -E option propagates error traps into called functions. In the case of the `_retry` function, we don't want to do that because if we interrupt execution on an error, it becomes impossible to retry.
#
# We need to do this in a wrapper because by the time we are executing _retry, it's too late to unset the error trap.
#
function retry() {
local -r -i retries="$1"; shift
local exit_code
local stdin_value

cmd=(_retry "$retries")
if [[ "$1" == "--with-stdin" ]]; then
cmd+=(--with-stdin)
read -sr stdin_value
shift
fi

[[ "$WAS_BIG_E_OPTION_SET_AT_START" == "yes" ]] && set +E

"${cmd[@]}" "$@" <<< "${stdin_value:-}"
exit_code=$?

[[ "$WAS_BIG_E_OPTION_SET_AT_START" == "yes" ]] && set -E

return $exit_code
}

# Retries a command on failure.
function _retry() {
local -r -i retries="$1"; shift
local -i max_attempts=$((retries + 1))
local -i attempt_num=1
Expand Down
12 changes: 12 additions & 0 deletions tests/jigs/trap-and-source-env-hook.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

set -Eeuo pipefail

handle_err() {
echo "TRAP TRIGGERED" >&2
exit 53
}

trap handle_err ERR

source hooks/environment
56 changes: 55 additions & 1 deletion tests/run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -547,4 +547,58 @@ load "${BATS_PLUGIN_PATH}/load.bash"
unstub aws
unstub docker
rm /tmp/password-stdin
}
}

@test "Set error trap; source env hook; ECR login; discovered account ID, with error, and then retry until success" {
[[ -z $SKIP_SLOW ]] || skip "skipping slow test"
export BUILDKITE_PLUGIN_ECR_LOGIN=true
export BUILDKITE_PLUGIN_ECR_RETRIES=1
export AWS_DEFAULT_REGION=us-east-1

stub aws \
"--version : echo aws-cli/2.0.0 Python/3.8.1 Linux/5.5.6-arch1-1 botocore/1.15.3" \
"sts get-caller-identity --query Account --output text : echo 888888888888" \
"--region us-east-1 ecr get-login-password : exit 1" \
"--region us-east-1 ecr get-login-password : echo hunter2"

stub docker \
"login --username AWS --password-stdin 888888888888.dkr.ecr.us-east-1.amazonaws.com : cat > /tmp/password-stdin ; echo logging in to docker"

# I don't know whether error trapping is supported in Bats (it's not mentioned in the docs), or how control flow would work after a trap was triggered (e.g. making assertions, cleanup). So we're shoving it all in a script to encapsulate it.
run ${PWD}/tests/jigs/trap-and-source-env-hook.sh

assert_success

refute_output --partial "TRAP TRIGGERED"
assert_output --partial "Login failed on attempt 1 of 2. Trying again in 1 seconds.."
assert_output --partial "logging in to docker"

assert_equal "hunter2" "$(cat /tmp/password-stdin)"

unstub aws
unstub docker
rm /tmp/password-stdin
}

@test "Set error trap; source env hook; ECR login; discovered account ID, with error, and then retry until failure" {
[[ -z $SKIP_SLOW ]] || skip "skipping slow test"
export BUILDKITE_PLUGIN_ECR_LOGIN=true
export BUILDKITE_PLUGIN_ECR_RETRIES=1
export AWS_DEFAULT_REGION=us-east-1

stub aws \
"--version : echo aws-cli/2.0.0 Python/3.8.1 Linux/5.5.6-arch1-1 botocore/1.15.3" \
"sts get-caller-identity --query Account --output text : echo 888888888888" \
"--region us-east-1 ecr get-login-password : exit 1" \
"--region us-east-1 ecr get-login-password : exit 1"

run $PWD/tests/jigs/trap-and-source-env-hook.sh

assert_failure

assert_output --partial "Login failed on attempt 1 of 2. Trying again in 1 seconds.."
assert_output --partial "Login failed after 2 attempts"
assert_output --partial "TRAP TRIGGERED"

unstub aws
}

0 comments on commit ef92d80

Please sign in to comment.