Skip to content

Commit

Permalink
DEVPROD-13056: fix s3 put skip_existing failures
Browse files Browse the repository at this point in the history
This commit fixes s3.put's skip_existing parameter to not always fail
when a remote file already exists. At the end of s3.put, we check that
the length of uploadedFiles matches the length of fileList. If the
lengths aren't equal, the command should fail because there was at least
one upload error. skip_existing did not append to this list, but these
skipped files were expected and should not be errors.

To fix this, I added a new slice that tracks files we've skipped
separately. The end check now considers both uploaded and skipped files
to determine if there was an error.

I've also included a new integration test that checks s3.put with
skip_existing set to true.
  • Loading branch information
drichmdb committed Nov 25, 2024
1 parent de1d7e2 commit df22233
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 17 deletions.
30 changes: 15 additions & 15 deletions agent/command/s3_put.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,10 @@ func (s3pc *s3put) putWithRetry(ctx context.Context, comm client.Communicator, l
backoffCounter := getS3OpBackoff()

var (
err error
uploadedFiles []string
filesList []string
err error
uploadedFiles []string
filesList []string
skippedFilesCount int
)

timer := time.NewTimer(0)
Expand Down Expand Up @@ -379,23 +380,18 @@ retryLoop:
}
filesList, err = b.Build()
if err != nil {
// Skip erroring since local files include filter should treat files as optional.
if strings.Contains(err.Error(), utility.WalkThroughError) {
logger.Task().Warningf("Error while building file list: %s", err.Error())
return nil
} else {
return errors.Wrapf(err, "processing local files include filter '%s'",
strings.Join(s3pc.LocalFilesIncludeFilter, " "))
}
return errors.Wrapf(err, "processing local files include filter '%s'",
strings.Join(s3pc.LocalFilesIncludeFilter, " "))
}
if len(filesList) == 0 {
logger.Task().Warningf("File filter '%s' matched no files.", strings.Join(s3pc.LocalFilesIncludeFilter, " "))
logger.Task().Infof("File filter '%s' matched no files.", strings.Join(s3pc.LocalFilesIncludeFilter, " "))
return nil
}
}

// reset to avoid duplicated uploaded references
uploadedFiles = []string{}
skippedFilesCount = 0

uploadLoop:
for _, fpath := range filesList {
Expand All @@ -422,6 +418,8 @@ retryLoop:
return errors.Wrapf(err, "checking if file '%s' exists", remoteName)
}
if exists {
skippedFilesCount++

logger.Task().Infof("Not uploading file '%s' because remote file '%s' already exists. Continuing to upload other files.", fpath, remoteName)
continue uploadLoop
}
Expand Down Expand Up @@ -475,9 +473,11 @@ retryLoop:

logger.Task().WarningWhen(strings.Contains(s3pc.Bucket, "."), "Bucket names containing dots that are created after Sept. 30, 2020 are not guaranteed to have valid attached URLs.")

if len(uploadedFiles) != len(filesList) && !s3pc.skipMissing {
logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), len(uploadedFiles))
return errors.Errorf("uploaded %d files of %d requested", len(uploadedFiles), len(filesList))
processedCount := skippedFilesCount + len(uploadedFiles)

if processedCount != len(filesList) && !s3pc.skipMissing {
logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), processedCount)
return errors.Errorf("uploaded %d files of %d requested", processedCount, len(filesList))
}

return nil
Expand Down
89 changes: 89 additions & 0 deletions agent/command/s3_put_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,25 @@ package command

import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/aws/aws-sdk-go-v2/credentials"
s3Types "github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/evergreen-ci/evergreen/agent/internal"
"github.com/evergreen-ci/evergreen/agent/internal/client"
"github.com/evergreen-ci/evergreen/model"
"github.com/evergreen-ci/evergreen/model/artifact"
"github.com/evergreen-ci/evergreen/model/task"
"github.com/evergreen-ci/evergreen/testutil"
"github.com/evergreen-ci/evergreen/util"
"github.com/evergreen-ci/pail"
"github.com/evergreen-ci/utility"
"github.com/mongodb/grip/send"
. "github.com/smartystreets/goconvey/convey"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -639,3 +646,85 @@ func TestPreservePath(t *testing.T) {
}

}

func TestS3PutSkipExisting(t *testing.T) {
if skip, _ := strconv.ParseBool(os.Getenv("SKIP_INTEGRATION_TESTS")); skip {
t.Skip("SKIP_INTEGRATION_TESTS is set, skipping integration test")
}

temproot := t.TempDir()

settings := testutil.GetIntegrationFile(t)

firstFilePath := filepath.Join(temproot, "first-file.txt")
secondFilePath := filepath.Join(temproot, "second-file.txt")

payload := []byte("hello world")
require.NoError(t, os.WriteFile(firstFilePath, payload, 0755))
require.NoError(t, os.WriteFile(secondFilePath, []byte("second file"), 0755))

accessKeyID := settings.Expansions["aws_key"]
secretAccessKey := settings.Expansions["aws_secret"]
token := settings.Expansions["aws_token"]
bucketName := settings.Expansions["bucket"]
region := "us-east-1"

id := utility.RandomString()

remoteFile := fmt.Sprintf("tests/%s/%s", t.Name(), id)

cmd := s3PutFactory()
params := map[string]any{
"aws_key": accessKeyID,
"aws_secret": secretAccessKey,
"aws_session_token": token,
"local_file": firstFilePath,
"remote_file": remoteFile,
"bucket": bucketName,
"region": region,
"skip_existing": "true",
"content_type": "text/plain",
"permissions": "private",
}

require.NoError(t, cmd.ParseParams(params))

tconf := &internal.TaskConfig{
Task: task.Task{},
WorkDir: temproot,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

sender := send.MakeInternalLogger()
logger := client.NewSingleChannelLogHarness("test", sender)
comm := client.NewMock("")

require.NoError(t, cmd.Execute(ctx, comm, logger, tconf))

params["local_file"] = secondFilePath
require.NoError(t, cmd.ParseParams(params))

require.NoError(t, cmd.Execute(ctx, comm, logger, tconf))

creds := credentials.NewStaticCredentialsProvider(accessKeyID, secretAccessKey, token)

opts := pail.S3Options{
Region: region,
Name: bucketName,
Credentials: creds,
}

bucket, err := pail.NewS3Bucket(ctx, opts)
require.NoError(t, err)

got, err := bucket.Get(ctx, remoteFile)
require.NoError(t, err)

content, err := io.ReadAll(got)
require.NoError(t, err)

// verify that file content wasn't overwritten by the second file
assert.Equal(t, payload, content)
}
2 changes: 1 addition & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ var (

// Agent version to control agent rollover. The format is the calendar date
// (YYYY-MM-DD).
AgentVersion = "2024-11-26"
AgentVersion = "2024-11-26-A"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/aws/aws-sdk-go-v2 v1.30.3
github.com/aws/aws-sdk-go-v2/config v1.27.27
github.com/aws/aws-sdk-go-v2/credentials v1.17.27 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.27
github.com/aws/aws-sdk-go-v2/service/ec2 v1.168.0
github.com/aws/aws-sdk-go-v2/service/ecs v1.44.1
github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.23.1
Expand Down
4 changes: 4 additions & 0 deletions scripts/setup-credentials.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ github_pr_creator_org: "10gen"
expansions:
papertrail_key_id: $PAPERTRAIL_KEY_ID
papertrail_secret_key: $PAPERTRAIL_SECRET_KEY
aws_key: $AWS_ACCESS_KEY_ID
aws_secret: $AWS_SECRET_ACCESS_KEY
aws_token: $AWS_SESSION_TOKEN
bucket: evergreen-integration-testing
# Do not edit below this line
github_app_key: |
EOF
Expand Down
4 changes: 4 additions & 0 deletions self-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ functions:
PAPERTRAIL_SECRET_KEY: ${papertrail_secret_key}
RUNTIME_ENVIRONMENTS_BASE_URL: ${staging_runtime_environments_base_url}
RUNTIME_ENVIRONMENTS_API_KEY: ${staging_runtime_environments_api_key}
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
AWS_SESSION_TOKEN: ${AWS_SESSION_TOKEN}

command: bash scripts/setup-credentials.sh

setup-mongodb:
Expand Down

0 comments on commit df22233

Please sign in to comment.