Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEVPROD-13056: fix s3 put skip_existing failures #8510

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions agent/command/s3_put.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,10 @@ func (s3pc *s3put) putWithRetry(ctx context.Context, comm client.Communicator, l
backoffCounter := getS3OpBackoff()

var (
err error
uploadedFiles []string
filesList []string
err error
uploadedFiles []string
filesList []string
skippedFilesCount int
)

timer := time.NewTimer(0)
Expand Down Expand Up @@ -396,6 +397,7 @@ retryLoop:

// reset to avoid duplicated uploaded references
uploadedFiles = []string{}
skippedFilesCount = 0

uploadLoop:
for _, fpath := range filesList {
Expand All @@ -422,6 +424,8 @@ retryLoop:
return errors.Wrapf(err, "checking if file '%s' exists", remoteName)
}
if exists {
skippedFilesCount++

logger.Task().Infof("Not uploading file '%s' because remote file '%s' already exists. Continuing to upload other files.", fpath, remoteName)
continue uploadLoop
}
Expand Down Expand Up @@ -475,9 +479,11 @@ retryLoop:

logger.Task().WarningWhen(strings.Contains(s3pc.Bucket, "."), "Bucket names containing dots that are created after Sept. 30, 2020 are not guaranteed to have valid attached URLs.")

if len(uploadedFiles) != len(filesList) && !s3pc.skipMissing {
logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), len(uploadedFiles))
return errors.Errorf("uploaded %d files of %d requested", len(uploadedFiles), len(filesList))
processedCount := skippedFilesCount + len(uploadedFiles)

if processedCount != len(filesList) && !s3pc.skipMissing {
logger.Task().Infof("Attempted to upload %d files, %d successfully uploaded.", len(filesList), processedCount)
return errors.Errorf("uploaded %d files of %d requested", processedCount, len(filesList))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this edit is wrong -- it should still be using length of uploaded files

}

return nil
Expand Down
89 changes: 89 additions & 0 deletions agent/command/s3_put_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,25 @@ package command

import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/aws/aws-sdk-go-v2/credentials"
s3Types "github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/evergreen-ci/evergreen/agent/internal"
"github.com/evergreen-ci/evergreen/agent/internal/client"
"github.com/evergreen-ci/evergreen/model"
"github.com/evergreen-ci/evergreen/model/artifact"
"github.com/evergreen-ci/evergreen/model/task"
"github.com/evergreen-ci/evergreen/testutil"
"github.com/evergreen-ci/evergreen/util"
"github.com/evergreen-ci/pail"
"github.com/evergreen-ci/utility"
"github.com/mongodb/grip/send"
. "github.com/smartystreets/goconvey/convey"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -639,3 +646,85 @@ func TestPreservePath(t *testing.T) {
}

}

func TestS3PutSkipExisting(t *testing.T) {
if skip, _ := strconv.ParseBool(os.Getenv("SKIP_INTEGRATION_TESTS")); skip {
t.Skip("SKIP_INTEGRATION_TESTS is set, skipping integration test")
}

temproot := t.TempDir()

settings := testutil.GetIntegrationFile(t)

firstFilePath := filepath.Join(temproot, "first-file.txt")
secondFilePath := filepath.Join(temproot, "second-file.txt")

payload := []byte("hello world")
require.NoError(t, os.WriteFile(firstFilePath, payload, 0755))
require.NoError(t, os.WriteFile(secondFilePath, []byte("second file"), 0755))

accessKeyID := settings.Expansions["aws_key"]
secretAccessKey := settings.Expansions["aws_secret"]
token := settings.Expansions["aws_token"]
bucketName := settings.Expansions["bucket"]
region := "us-east-1"

id := utility.RandomString()

remoteFile := fmt.Sprintf("tests/%s/%s", t.Name(), id)

cmd := s3PutFactory()
params := map[string]any{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤯 didn't know about any

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's newish!

"aws_key": accessKeyID,
"aws_secret": secretAccessKey,
"aws_session_token": token,
"local_file": firstFilePath,
"remote_file": remoteFile,
"bucket": bucketName,
"region": region,
"skip_existing": "true",
"content_type": "text/plain",
"permissions": "private",
}

require.NoError(t, cmd.ParseParams(params))

tconf := &internal.TaskConfig{
Task: task.Task{},
WorkDir: temproot,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

sender := send.MakeInternalLogger()
logger := client.NewSingleChannelLogHarness("test", sender)
comm := client.NewMock("")

require.NoError(t, cmd.Execute(ctx, comm, logger, tconf))

params["local_file"] = secondFilePath
require.NoError(t, cmd.ParseParams(params))

require.NoError(t, cmd.Execute(ctx, comm, logger, tconf))

creds := credentials.NewStaticCredentialsProvider(accessKeyID, secretAccessKey, token)

opts := pail.S3Options{
Region: region,
Name: bucketName,
Credentials: creds,
}

bucket, err := pail.NewS3Bucket(ctx, opts)
require.NoError(t, err)

got, err := bucket.Get(ctx, remoteFile)
require.NoError(t, err)

content, err := io.ReadAll(got)
require.NoError(t, err)

// verify that file content wasn't overwritten by the second file
assert.Equal(t, payload, content)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a little comment here, like "verify that file content wasn't overwritten by the second file"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done!

}
2 changes: 1 addition & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ var (

// Agent version to control agent rollover. The format is the calendar date
// (YYYY-MM-DD).
AgentVersion = "2024-11-26"
AgentVersion = "2024-11-27"
)

const (
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/aws/aws-sdk-go-v2 v1.30.3
github.com/aws/aws-sdk-go-v2/config v1.27.27
github.com/aws/aws-sdk-go-v2/credentials v1.17.27 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.27
github.com/aws/aws-sdk-go-v2/service/ec2 v1.168.0
github.com/aws/aws-sdk-go-v2/service/ecs v1.44.1
github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi v1.23.1
Expand Down
4 changes: 4 additions & 0 deletions scripts/setup-credentials.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ github_pr_creator_org: "10gen"
expansions:
papertrail_key_id: $PAPERTRAIL_KEY_ID
papertrail_secret_key: $PAPERTRAIL_SECRET_KEY
aws_key: $AWS_ACCESS_KEY_ID
aws_secret: $AWS_SECRET_ACCESS_KEY
Comment on lines +77 to +78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these meant to be different than the ones used on line 54 and 55? I don't have time for a full review so I may be missing something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure where those are coming from honestly -- I'm using the credentials we get from assume role at the beginning of the task!

aws_token: $AWS_SESSION_TOKEN
bucket: evergreen-integration-testing
# Do not edit below this line
github_app_key: |
EOF
Expand Down
4 changes: 4 additions & 0 deletions self-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ functions:
PAPERTRAIL_SECRET_KEY: ${papertrail_secret_key}
RUNTIME_ENVIRONMENTS_BASE_URL: ${staging_runtime_environments_base_url}
RUNTIME_ENVIRONMENTS_API_KEY: ${staging_runtime_environments_api_key}
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
AWS_SESSION_TOKEN: ${AWS_SESSION_TOKEN}

command: bash scripts/setup-credentials.sh

setup-mongodb:
Expand Down
Loading