From ce17a00065d179517e7bca7225621602f7d95b0b Mon Sep 17 00:00:00 2001 From: Dylan Richardson Date: Tue, 26 Nov 2024 10:38:16 -0500 Subject: [PATCH] DEVPROD-13056: fix s3 put skip_existing failures (#8510) This commit fixes s3.put's skip_existing parameter to not always fail when a remote file already exists. At the end of s3.put, we check that the length of uploadedFiles matches the length of fileList. If the lengths aren't equal, the command should fail because there was at least one upload error. skip_existing did not append to this list, but these skipped files were expected and should not be errors. To fix this, I added a new slice that tracks files we've skipped separately. The end check now considers both uploaded and skipped files to determine if there was an error. I've also included a new integration test that checks s3.put with skip_existing set to true. --- agent/command/s3_put.go | 18 +++++--- agent/command/s3_put_test.go | 89 ++++++++++++++++++++++++++++++++++++ config.go | 2 +- go.mod | 2 +- scripts/setup-credentials.sh | 4 ++ self-tests.yml | 4 ++ 6 files changed, 111 insertions(+), 8 deletions(-) diff --git a/agent/command/s3_put.go b/agent/command/s3_put.go index 205ec3d4e6d..61adc7a340e 100644 --- a/agent/command/s3_put.go +++ b/agent/command/s3_put.go @@ -346,9 +346,10 @@ func (s3pc *s3put) putWithRetry(ctx context.Context, comm client.Communicator, l backoffCounter := getS3OpBackoff() var ( - err error - uploadedFiles []string - filesList []string + err error + uploadedFiles []string + filesList []string + skippedFilesCount int ) timer := time.NewTimer(0) @@ -396,6 +397,7 @@ retryLoop: // reset to avoid duplicated uploaded references uploadedFiles = []string{} + skippedFilesCount = 0 uploadLoop: for _, fpath := range filesList { @@ -422,6 +424,8 @@ retryLoop: return errors.Wrapf(err, "checking if file '%s' exists", remoteName) } if exists { + skippedFilesCount++ + logger.Task().Infof("Not uploading file '%s' because remote file '%s' already exists. Continuing to upload other files.", fpath, remoteName) continue uploadLoop } @@ -475,9 +479,11 @@ retryLoop: logger.Task().WarningWhen(strings.Contains(s3pc.Bucket, "."), "Bucket names containing dots that are created after Sept. 30, 2020 are not guaranteed to have valid attached URLs.") - if len(uploadedFiles) != len(filesList) && !s3pc.skipMissing { - logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), len(uploadedFiles)) - return errors.Errorf("uploaded %d files of %d requested", len(uploadedFiles), len(filesList)) + processedCount := skippedFilesCount + len(uploadedFiles) + + if processedCount != len(filesList) && !s3pc.skipMissing { + logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), processedCount) + return errors.Errorf("uploaded %d files of %d requested", processedCount, len(filesList)) } return nil diff --git a/agent/command/s3_put_test.go b/agent/command/s3_put_test.go index 0f8774541f0..fd35621c0b0 100644 --- a/agent/command/s3_put_test.go +++ b/agent/command/s3_put_test.go @@ -2,18 +2,25 @@ package command import ( "context" + "fmt" + "io" "os" "path/filepath" + "strconv" "testing" + "github.com/aws/aws-sdk-go-v2/credentials" s3Types "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/evergreen-ci/evergreen/agent/internal" "github.com/evergreen-ci/evergreen/agent/internal/client" "github.com/evergreen-ci/evergreen/model" "github.com/evergreen-ci/evergreen/model/artifact" "github.com/evergreen-ci/evergreen/model/task" + "github.com/evergreen-ci/evergreen/testutil" "github.com/evergreen-ci/evergreen/util" "github.com/evergreen-ci/pail" + "github.com/evergreen-ci/utility" + "github.com/mongodb/grip/send" . "github.com/smartystreets/goconvey/convey" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -639,3 +646,85 @@ func TestPreservePath(t *testing.T) { } } + +func TestS3PutSkipExisting(t *testing.T) { + if skip, _ := strconv.ParseBool(os.Getenv("SKIP_INTEGRATION_TESTS")); skip { + t.Skip("SKIP_INTEGRATION_TESTS is set, skipping integration test") + } + + temproot := t.TempDir() + + settings := testutil.GetIntegrationFile(t) + + firstFilePath := filepath.Join(temproot, "first-file.txt") + secondFilePath := filepath.Join(temproot, "second-file.txt") + + payload := []byte("hello world") + require.NoError(t, os.WriteFile(firstFilePath, payload, 0755)) + require.NoError(t, os.WriteFile(secondFilePath, []byte("second file"), 0755)) + + accessKeyID := settings.Expansions["aws_key"] + secretAccessKey := settings.Expansions["aws_secret"] + token := settings.Expansions["aws_token"] + bucketName := settings.Expansions["bucket"] + region := "us-east-1" + + id := utility.RandomString() + + remoteFile := fmt.Sprintf("tests/%s/%s", t.Name(), id) + + cmd := s3PutFactory() + params := map[string]any{ + "aws_key": accessKeyID, + "aws_secret": secretAccessKey, + "aws_session_token": token, + "local_file": firstFilePath, + "remote_file": remoteFile, + "bucket": bucketName, + "region": region, + "skip_existing": "true", + "content_type": "text/plain", + "permissions": "private", + } + + require.NoError(t, cmd.ParseParams(params)) + + tconf := &internal.TaskConfig{ + Task: task.Task{}, + WorkDir: temproot, + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sender := send.MakeInternalLogger() + logger := client.NewSingleChannelLogHarness("test", sender) + comm := client.NewMock("") + + require.NoError(t, cmd.Execute(ctx, comm, logger, tconf)) + + params["local_file"] = secondFilePath + require.NoError(t, cmd.ParseParams(params)) + + require.NoError(t, cmd.Execute(ctx, comm, logger, tconf)) + + creds := credentials.NewStaticCredentialsProvider(accessKeyID, secretAccessKey, token) + + opts := pail.S3Options{ + Region: region, + Name: bucketName, + Credentials: creds, + } + + bucket, err := pail.NewS3Bucket(ctx, opts) + require.NoError(t, err) + + got, err := bucket.Get(ctx, remoteFile) + require.NoError(t, err) + + content, err := io.ReadAll(got) + require.NoError(t, err) + + // verify that file content wasn't overwritten by the second file + assert.Equal(t, payload, content) +} diff --git a/config.go b/config.go index 3098da8d065..f4dea4c95d6 100644 --- a/config.go +++ b/config.go @@ -37,7 +37,7 @@ var ( // Agent version to control agent rollover. The format is the calendar date // (YYYY-MM-DD). - AgentVersion = "2024-11-26" + AgentVersion = "2024-11-27" ) const ( diff --git a/go.mod b/go.mod index 5d72e2081c4..3df7a705260 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/aws/aws-sdk-go v1.55.5 // indirect github.com/aws/aws-sdk-go-v2 v1.30.3 github.com/aws/aws-sdk-go-v2/config v1.27.27 - github.com/aws/aws-sdk-go-v2/credentials v1.17.27 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.27 github.com/aws/aws-sdk-go-v2/service/ec2 v1.168.0 github.com/aws/aws-sdk-go-v2/service/ecs v1.44.1 github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.23.1 diff --git a/scripts/setup-credentials.sh b/scripts/setup-credentials.sh index b8ae59e0337..5ed41db7060 100755 --- a/scripts/setup-credentials.sh +++ b/scripts/setup-credentials.sh @@ -74,6 +74,10 @@ github_pr_creator_org: "10gen" expansions: papertrail_key_id: $PAPERTRAIL_KEY_ID papertrail_secret_key: $PAPERTRAIL_SECRET_KEY + aws_key: $AWS_ACCESS_KEY_ID + aws_secret: $AWS_SECRET_ACCESS_KEY + aws_token: $AWS_SESSION_TOKEN + bucket: evergreen-integration-testing # Do not edit below this line github_app_key: | EOF diff --git a/self-tests.yml b/self-tests.yml index dab66c40f14..a299e272545 100644 --- a/self-tests.yml +++ b/self-tests.yml @@ -290,6 +290,10 @@ functions: PAPERTRAIL_SECRET_KEY: ${papertrail_secret_key} RUNTIME_ENVIRONMENTS_BASE_URL: ${staging_runtime_environments_base_url} RUNTIME_ENVIRONMENTS_API_KEY: ${staging_runtime_environments_api_key} + AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID} + AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY} + AWS_SESSION_TOKEN: ${AWS_SESSION_TOKEN} + command: bash scripts/setup-credentials.sh setup-mongodb: