From 2ee73b7459069df1420cf4e9661b31dcd5cb5665 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Fri, 1 Sep 2023 12:07:29 +0300
Subject: [PATCH 01/40] athena-driver

---
 cli/cmd/runtime/start.go                      |   1 +
 go.mod                                        |  13 +-
 go.sum                                        |  13 +
 runtime/compilers/rillv1beta/connector.go     |   4 +
 runtime/drivers/athena/athena.go              | 303 ++++++++++++++++++
 .../duckdb/transporter/filestore_to_duckDB.go |   4 +-
 .../transporter/objectStore_to_duckDB.go      |   4 +-
 runtime/reconcilers/source.go                 |   4 +
 .../catalog/artifacts/yaml/objects.go         |  10 +
 .../catalog/migrator/sources/sources.go       |   4 +
 .../sources/modal/AddSourceModal.svelte       |   2 +-
 .../src/features/sources/modal/yupSchemas.ts  |  14 +
 12 files changed, 366 insertions(+), 10 deletions(-)
 create mode 100644 runtime/drivers/athena/athena.go

diff --git a/cli/cmd/runtime/start.go b/cli/cmd/runtime/start.go
index 6b539e74844..50092647d29 100644
--- a/cli/cmd/runtime/start.go
+++ b/cli/cmd/runtime/start.go
@@ -23,6 +23,7 @@ import (
 	"golang.org/x/sync/errgroup"
 
 	// Load connectors and reconcilers for runtime
+	_ "github.com/rilldata/rill/runtime/drivers/athena"
 	_ "github.com/rilldata/rill/runtime/drivers/bigquery"
 	_ "github.com/rilldata/rill/runtime/drivers/druid"
 	_ "github.com/rilldata/rill/runtime/drivers/duckdb"
diff --git a/go.mod b/go.mod
index 5061ba4d75d..d19bfa4eff4 100644
--- a/go.mod
+++ b/go.mod
@@ -86,7 +86,10 @@ require (
 	moul.io/zapfilter v1.7.0
 )
 
-require google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect
+require (
+	github.com/aws/aws-sdk-go-v2/service/athena v1.31.6 // indirect
+	google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect
+)
 
 require (
 	cloud.google.com/go v0.110.2
@@ -105,14 +108,14 @@ require (
 	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/apache/arrow/go/v12 v12.0.0 // indirect
 	github.com/apache/thrift v0.18.1 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.18.0 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.21.0 // indirect
 	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect
 	github.com/aws/aws-sdk-go-v2/config v1.18.25 // indirect
 	github.com/aws/aws-sdk-go-v2/credentials v1.13.24 // indirect
 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 // indirect
 	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.67 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 // indirect
@@ -123,7 +126,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 // indirect
 	github.com/aws/aws-sdk-go-v2/service/sts v1.19.0 // indirect
-	github.com/aws/smithy-go v1.13.5 // indirect
+	github.com/aws/smithy-go v1.14.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
diff --git a/go.sum b/go.sum
index d7a794d9f43..d5058389b09 100644
--- a/go.sum
+++ b/go.sum
@@ -598,6 +598,8 @@ github.com/aws/aws-sdk-go-v2 v1.9.1/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVj
 github.com/aws/aws-sdk-go-v2 v1.17.4/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
 github.com/aws/aws-sdk-go-v2 v1.18.0 h1:882kkTpSFhdgYRKVZ/VCgf7sd0ru57p2JCxz4/oN5RY=
 github.com/aws/aws-sdk-go-v2 v1.18.0/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
+github.com/aws/aws-sdk-go-v2 v1.21.0 h1:gMT0IW+03wtYJhRqTVYn0wLzwdnK9sRMcxmtfGzRdJc=
+github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M=
 github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 h1:dK82zF6kkPeCo8J1e+tGx4JdvDIQzj7ygIoLg8WMuGs=
 github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10/go.mod h1:VeTZetY5KRJLuD/7fkQXMU6Mw7H5m/KP2J5Iy9osMno=
 github.com/aws/aws-sdk-go-v2/config v1.18.12/go.mod h1:J36fOhj1LQBr+O4hJCiT8FwVvieeoSGOtPuvhKlsNu8=
@@ -615,15 +617,21 @@ github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.67/go.mod h1:zQClPRIwQZfJl
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28/go.mod h1:3lwChorpIM/BhImY/hy+Z6jekmN92cXGPI1QJasVPYY=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 h1:kG5eQilShqmJbv11XL1VpyDbaEJzWxd4zRiCG30GSn4=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33/go.mod h1:7i0PF1ME/2eUPFcjkVIwq+DOygHEoK92t5cDqNgYbIw=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22/go.mod h1:EqK7gVrIGAHyZItrD1D8B0ilgwMD1GiWAmbU4u/JHNk=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 h1:vFQlirhuM8lLlpI7imKOMsjdQLuN9CPi+k44F/OFVsk=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27/go.mod h1:UrHnn3QV/d0pBZ6QBAEQcqFLf8FAzLmoUfPVIueOvoM=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35/go.mod h1:SJC1nEVVva1g3pHAIdCp7QsRIkMmLAgoDquQ9Rr8kYw=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.3.29/go.mod h1:TwuqRBGzxjQJIwH16/fOZodwXt2Zxa9/cwJC5ke4j7s=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34 h1:gGLG7yKaXG02/jBlg210R7VgQIotiQntNhsCFejawx8=
 github.com/aws/aws-sdk-go-v2/internal/ini v1.3.34/go.mod h1:Etz2dj6UHYuw+Xw830KfzCfWGMzqvUTCjUj5b76GVDc=
 github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.19/go.mod h1:8W88sW3PjamQpKFUQvHWWKay6ARsNvZnzU7+a4apubw=
 github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25 h1:AzwRi5OKKwo4QNqPf7TjeO+tK8AyOK3GVSwmRPo7/Cs=
 github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.25/go.mod h1:SUbB4wcbSEyCvqBxv/O/IBf93RbEze7U7OnoTlpPB+g=
+github.com/aws/aws-sdk-go-v2/service/athena v1.31.6 h1:EFaTu1rBt+KQglDeYRpP1PHot/6xlYzvouxm2aRmrG8=
+github.com/aws/aws-sdk-go-v2/service/athena v1.31.6/go.mod h1:DHafyhR8x70ANJZ2RkJx8oeJsfEBqaGwZ591vlihVFQ=
 github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.8.1/go.mod h1:CM+19rL1+4dFWnOQKwDc7H1KwXTz+h61oUSHyhV0b3o=
 github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11 h1:y2+VQzC6Zh2ojtV2LoC0MNwHWc6qXv/j2vrQtlftkdA=
 github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.11/go.mod h1:iV4q2hsqtNECrfmlXyord9u4zyuFEJX9eLgLpSPzWA8=
@@ -656,6 +664,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.19.0/go.mod h1:BgQOMsg8av8jset59jely
 github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
 github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
 github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
+github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
+github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
 github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM=
 github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
 github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
@@ -1996,6 +2006,7 @@ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
 github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
 github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
+github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
 github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
 github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
@@ -3280,7 +3291,9 @@ modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0=
 modernc.org/ccgo/v3 v3.16.13 h1:Mkgdzl46i5F/CNR/Kj80Ri59hC8TKAhZrYSaqvkwzUw=
 modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY=
 modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk=
+modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ=
 modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM=
+modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM=
 modernc.org/libc v1.22.6 h1:cbXU8R+A6aOjRuhsFh3nbDWXO/Hs4ClJRXYB11KmPDo=
 modernc.org/libc v1.22.6/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
 modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
diff --git a/runtime/compilers/rillv1beta/connector.go b/runtime/compilers/rillv1beta/connector.go
index eeec8f9608f..9be6f3f873b 100644
--- a/runtime/compilers/rillv1beta/connector.go
+++ b/runtime/compilers/rillv1beta/connector.go
@@ -184,6 +184,10 @@ func source(connector string, src *runtimev1.Source) drivers.Source {
 		return &drivers.DatabaseSource{
 			Props: props,
 		}
+	case "athena":
+		return &drivers.BucketSource{
+			Properties: props,
+		}
 	default:
 		return nil
 	}
diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
new file mode 100644
index 00000000000..1f4fa365364
--- /dev/null
+++ b/runtime/drivers/athena/athena.go
@@ -0,0 +1,303 @@
+package athena
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/service/athena"
+	"github.com/aws/aws-sdk-go-v2/service/athena/types"
+	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/eapache/go-resiliency/retrier"
+	"github.com/mitchellh/mapstructure"
+	runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
+	"github.com/rilldata/rill/runtime/drivers"
+	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
+	"go.uber.org/zap"
+	"gocloud.dev/blob"
+	"gocloud.dev/blob/s3blob"
+)
+
+const defaultPageSize = 20
+
+func init() {
+	drivers.Register("athena", driver{})
+	drivers.RegisterAsConnector("athena", driver{})
+}
+
+var spec = drivers.Spec{
+	DisplayName:        "Amazon Athena",
+	Description:        "Connect to Amazon Athena database.",
+	ServiceAccountDocs: "",
+	SourceProperties: []drivers.PropertySchema{
+		{
+			Key:         "sql",
+			Type:        drivers.StringPropertyType,
+			Required:    true,
+			DisplayName: "SQL",
+			Description: "Query to extract data from Athena.",
+			Placeholder: "select * from catalog.table;",
+		},
+		{
+			Key:         "output.location",
+			DisplayName: "Output location",
+			Description: "Oputut location for query results in S3.",
+			Placeholder: "s3://bucket-name/path/",
+			Type:        drivers.StringPropertyType,
+			Required:    true,
+		},
+		{
+			Key:         "profile.name",
+			DisplayName: "AWS profile",
+			Description: "AWS profile for credentials.",
+			Type:        drivers.StringPropertyType,
+			Required:    true,
+		},
+	},
+	ConfigProperties: []drivers.PropertySchema{},
+}
+
+type driver struct{}
+
+type configProperties struct {
+	// SecretJSON      string `mapstructure:"google_application_credentials"`
+	// AllowHostAccess bool   `mapstructure:"allow_host_access"`
+}
+
+func (d driver) Open(config map[string]any, shared bool, logger *zap.Logger) (drivers.Handle, error) {
+	if shared {
+		return nil, fmt.Errorf("athena driver can't be shared")
+	}
+	conf := &configProperties{}
+	err := mapstructure.Decode(config, conf)
+	if err != nil {
+		return nil, err
+	}
+
+	conn := &Connection{
+		config: conf,
+		logger: logger,
+	}
+	return conn, nil
+}
+
+func (d driver) Drop(config map[string]any, logger *zap.Logger) error {
+	return drivers.ErrDropNotSupported
+}
+
+func (d driver) Spec() drivers.Spec {
+	return spec
+}
+
+func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) {
+	return false, fmt.Errorf("not implemented")
+}
+
+type sourceProperties struct {
+	SQL            string `mapstructure:"sql"`
+	OutputLocation string `mapstructure:"output.location"`
+	ProfileName    string `mapstructure:"profile.name"`
+}
+
+func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
+	fmt.Println(props)
+	conf := &sourceProperties{}
+	err := mapstructure.Decode(props, conf)
+	if err != nil {
+		return nil, err
+	}
+
+	return conf, nil
+}
+
+type Connection struct {
+	config *configProperties
+	logger *zap.Logger
+}
+
+var _ drivers.Handle = &Connection{}
+
+// Driver implements drivers.Connection.
+func (c *Connection) Driver() string {
+	return "athena"
+}
+
+// Config implements drivers.Connection.
+func (c *Connection) Config() map[string]any {
+	m := make(map[string]any, 0)
+	_ = mapstructure.Decode(c.config, m)
+	return m
+}
+
+// Close implements drivers.Connection.
+func (c *Connection) Close() error {
+	return nil
+}
+
+// Registry implements drivers.Connection.
+func (c *Connection) AsRegistry() (drivers.RegistryStore, bool) {
+	return nil, false
+}
+
+// Catalog implements drivers.Connection.
+func (c *Connection) AsCatalogStore(instanceID string) (drivers.CatalogStore, bool) {
+	return nil, false
+}
+
+// Repo implements drivers.Connection.
+func (c *Connection) AsRepoStore(instanceID string) (drivers.RepoStore, bool) {
+	return nil, false
+}
+
+// OLAP implements drivers.Connection.
+func (c *Connection) AsOLAP(instanceID string) (drivers.OLAPStore, bool) {
+	return nil, false
+}
+
+// Migrate implements drivers.Connection.
+func (c *Connection) Migrate(ctx context.Context) (err error) {
+	return nil
+}
+
+// MigrationStatus implements drivers.Connection.
+func (c *Connection) MigrationStatus(ctx context.Context) (current, desired int, err error) {
+	return 0, 0, nil
+}
+
+// AsObjectStore implements drivers.Connection.
+func (c *Connection) AsObjectStore() (drivers.ObjectStore, bool) {
+	return c, true
+}
+
+// AsTransporter implements drivers.Connection.
+func (c *Connection) AsTransporter(from, to drivers.Handle) (drivers.Transporter, bool) {
+	return nil, false
+}
+
+func (c *Connection) AsFileStore() (drivers.FileStore, bool) {
+	return nil, false
+}
+
+// AsSQLStore implements drivers.Connection.
+func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) {
+	return nil, false
+}
+
+// DownloadFiles returns a file iterator over objects stored in gcs.
+// The credential json is read from config google_application_credentials.
+// Additionally in case `allow_host_credentials` is true it looks for "Application Default Credentials" as well
+func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSource) (drivers.FileIterator, error) {
+	conf, err := parseSourceProperties(source.Properties)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse config: %w", err)
+	}
+
+	err = c.unload(ctx, conf)
+	if err != nil {
+		return nil, fmt.Errorf("failed to unload: %w", err)
+	}
+
+	path := conf.OutputLocation + "/parquet_output"
+	bucketObj, err := c.openBucket(ctx, conf, path)
+	if err != nil {
+		return nil, fmt.Errorf("cannot open bucket: %w", err)
+	}
+
+	opts := rillblob.Options{
+		ExtractPolicy: &runtimev1.Source_ExtractPolicy{
+			// FilesStrategy: runtimev1.Source_ExtractPolicy_STRATEGY_HEAD,
+		},
+		GlobPattern: "**/*",
+	}
+
+	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
+	if err != nil {
+		// TODO :: fix this for single file access. for single file first call only happens during download
+		var failureErr awserr.RequestFailure
+		if !errors.As(err, &failureErr) {
+			return nil, fmt.Errorf("failed to create the iterator %w", err)
+		}
+
+		// check again
+		if errors.As(err, &failureErr) && (failureErr.StatusCode() == http.StatusForbidden || failureErr.StatusCode() == http.StatusBadRequest) {
+			return nil, drivers.NewPermissionDeniedError(fmt.Sprintf("can't access remote err: %v", failureErr))
+		}
+	}
+
+	return it, err
+}
+
+func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
+	cfg, err := config.LoadDefaultConfig(context.TODO(), func(o *config.LoadOptions) error {
+		// o.Region = conf.Region
+		return nil
+	}, config.WithSharedConfigProfile(conf.ProfileName))
+	if err != nil {
+		return nil, err
+	}
+
+	s3client := s3v2.NewFromConfig(cfg)
+	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
+}
+
+func (c *Connection) unload(ctx context.Context, conf *sourceProperties) error {
+	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, conf.OutputLocation+"/parquet_output/") // todo create folder
+
+	cfg, err := config.LoadDefaultConfig(context.TODO(), func(o *config.LoadOptions) error {
+		// o.Region = "us-east-2"
+		return nil
+	}, config.WithSharedConfigProfile(conf.ProfileName))
+	fmt.Println("Executing : ", conf.ProfileName)
+
+	if err != nil {
+		return err
+	}
+
+	client := athena.NewFromConfig(cfg)
+
+	resultConfig := &types.ResultConfiguration{
+		OutputLocation: aws.String(conf.OutputLocation + "/output/"),
+	}
+
+	executeParams := &athena.StartQueryExecutionInput{
+		QueryString:         aws.String(finalSQL),
+		ResultConfiguration: resultConfig,
+	}
+
+	// Start Query Execution
+	athenaExecution, err := client.StartQueryExecution(ctx, executeParams)
+
+	if err != nil {
+		return err
+	}
+
+	// Get Query execution and check for the Query state constantly every 2 second
+	executionID := *athenaExecution.QueryExecutionId
+
+	r := retrier.New(retrier.LimitedExponentialBackoff(10, 100*time.Millisecond, 1*time.Second), nil) // 100 200 400 800 1000 1000 1000 1000 1000 1000
+
+	return r.Run(func() error {
+		status, stateErr := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
+			QueryExecutionId: &executionID,
+		})
+
+		if stateErr != nil {
+			return stateErr
+		}
+
+		state := status.QueryExecution.Status.State
+
+		if state == types.QueryExecutionStateSucceeded || state == types.QueryExecutionStateCancelled {
+			return nil
+		} else if state == types.QueryExecutionStateFailed {
+			return fmt.Errorf("Athen query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
+		}
+		return nil
+	})
+}
diff --git a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
index c5b8683da5f..e1da3b7ca40 100644
--- a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
@@ -28,11 +28,11 @@ var _ drivers.Transporter = &fileStoreToDuckDB{}
 func (t *fileStoreToDuckDB) Transfer(ctx context.Context, source drivers.Source, sink drivers.Sink, opts *drivers.TransferOpts, p drivers.Progress) error {
 	src, ok := source.FileSource()
 	if !ok {
-		return fmt.Errorf("type of source should `drivers.FilesSource`")
+		return fmt.Errorf("type of source should be `drivers.FilesSource`")
 	}
 	fSink, ok := sink.DatabaseSink()
 	if !ok {
-		return fmt.Errorf("type of source should `drivers.DatabaseSink`")
+		return fmt.Errorf("type of source should be `drivers.DatabaseSink`")
 	}
 
 	localPaths, err := t.from.FilePaths(ctx, src)
diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index 06945d4da8a..c96e8bd309b 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -33,11 +33,11 @@ func NewObjectStoreToDuckDB(from drivers.ObjectStore, to drivers.OLAPStore, logg
 func (t *objectStoreToDuckDB) Transfer(ctx context.Context, source drivers.Source, sink drivers.Sink, opts *drivers.TransferOpts, p drivers.Progress) error {
 	src, ok := source.BucketSource()
 	if !ok {
-		return fmt.Errorf("type of source should `drivers.BucketSource`")
+		return fmt.Errorf("type of source should be `drivers.BucketSource`")
 	}
 	dbSink, ok := sink.DatabaseSink()
 	if !ok {
-		return fmt.Errorf("type of source should `drivers.DatabaseSink`")
+		return fmt.Errorf("type of source should be `drivers.DatabaseSink`")
 	}
 
 	iterator, err := t.from.DownloadFiles(ctx, src)
diff --git a/runtime/reconcilers/source.go b/runtime/reconcilers/source.go
index 2c8f9632720..8a62c82a0ee 100644
--- a/runtime/reconcilers/source.go
+++ b/runtime/reconcilers/source.go
@@ -415,6 +415,10 @@ func driversSource(conn drivers.Handle, propsPB *structpb.Struct) (drivers.Sourc
 			SQL:   query,
 			Props: props,
 		}, nil
+	case "athena":
+		return &drivers.BucketSource{
+			Properties: props,
+		}, nil
 	default:
 		return nil, fmt.Errorf("source connector %q not supported", conn.Driver())
 	}
diff --git a/runtime/services/catalog/artifacts/yaml/objects.go b/runtime/services/catalog/artifacts/yaml/objects.go
index a67775089da..867a09d68ae 100644
--- a/runtime/services/catalog/artifacts/yaml/objects.go
+++ b/runtime/services/catalog/artifacts/yaml/objects.go
@@ -45,6 +45,8 @@ type Source struct {
 	SQL                   string         `yaml:"sql,omitempty" mapstructure:"sql,omitempty"`
 	DB                    string         `yaml:"db,omitempty" mapstructure:"db,omitempty"`
 	ProjectID             string         `yaml:"project_id,omitempty" mapstructure:"project_id,omitempty"`
+	AthenaOutputLocation  string         `yaml:"output.location,omitempty" mapstructure:"output.location,omitempty"`
+	AthenaProfileName     string         `yaml:"profile.name,omitempty" mapstructure:"profile.name,omitempty"`
 }
 
 type ExtractPolicy struct {
@@ -239,6 +241,14 @@ func fromSourceArtifact(source *Source, path string) (*drivers.CatalogEntry, err
 		props["project_id"] = source.ProjectID
 	}
 
+	if source.AthenaOutputLocation != "" {
+		props["output.location"] = source.AthenaOutputLocation
+	}
+
+	if source.AthenaProfileName != "" {
+		props["profile.name"] = source.AthenaProfileName
+	}
+
 	propsPB, err := structpb.NewStruct(props)
 	if err != nil {
 		return nil, err
diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go
index f5c32984e92..39a5b0d31d4 100644
--- a/runtime/services/catalog/migrator/sources/sources.go
+++ b/runtime/services/catalog/migrator/sources/sources.go
@@ -402,6 +402,10 @@ func source(connector string, src *runtimev1.Source) (drivers.Source, error) {
 			SQL:   query,
 			Props: props,
 		}, nil
+	case "athena":
+		return &drivers.BucketSource{
+			Properties: props,
+		}, nil
 	default:
 		return nil, fmt.Errorf("connector %v not supported", connector)
 	}
diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte
index d85e7ba84d3..2d566b9118d 100644
--- a/web-common/src/features/sources/modal/AddSourceModal.svelte
+++ b/web-common/src/features/sources/modal/AddSourceModal.svelte
@@ -91,7 +91,7 @@
     </TabGroup>
   </div>
   <div class="flex-grow overflow-y-auto">
-    {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "bigquery"}
+    {#if selectedConnector?.name === "gcs" || selectedConnector?.name === "s3" || selectedConnector?.name === "https" || selectedConnector?.name === "motherduck" || selectedConnector?.name === "bigquery" || selectedConnector?.name === "athena"}
       {#key selectedConnector}
         <RemoteSourceForm connector={selectedConnector} on:close />
       {/key}
diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts
index 3eb7b57be8e..45412cd33a2 100644
--- a/web-common/src/features/sources/modal/yupSchemas.ts
+++ b/web-common/src/features/sources/modal/yupSchemas.ts
@@ -69,6 +69,20 @@ export function getYupSchema(connector: V1ConnectorSpec) {
           .required("Source name is required"),
         project_id: yup.string().required("project_id is required"),
       });
+    case "athena":
+      return yup.object().shape({
+        sql: yup.string().required("sql is required"),
+        sourceName: yup
+          .string()
+          .matches(
+            /^[a-zA-Z_][a-zA-Z0-9_]*$/,
+            "Source name must start with a letter or underscore and contain only letters, numbers, and underscores"
+          )
+          .required("Source name is required"),
+        output_location: yup.string().required(),
+        profile_name: yup.string().required(),
+      });
+
     default:
       throw new Error(`Unknown connector: ${connector.name}`);
   }

From 328f2aee561ae40421a08344fb37d647752c8c30 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Fri, 1 Sep 2023 16:26:50 +0300
Subject: [PATCH 02/40] athena-driver

---
 runtime/drivers/athena/athena.go              | 38 +++++++++----------
 .../duckdb/transporter/filestore_to_duckDB.go |  2 +-
 .../transporter/objectStore_to_duckDB.go      | 18 ++++++---
 runtime/drivers/duckdb/transporter/utils.go   |  4 +-
 4 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 1f4fa365364..d918701e26c 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"net/http"
+	"strings"
 	"time"
 
 	"github.com/aws/aws-sdk-go-v2/aws"
@@ -15,6 +16,7 @@ import (
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/eapache/go-resiliency/retrier"
+	"github.com/google/uuid"
 	"github.com/mitchellh/mapstructure"
 	runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
 	"github.com/rilldata/rill/runtime/drivers"
@@ -48,7 +50,7 @@ var spec = drivers.Spec{
 			Key:         "output.location",
 			DisplayName: "Output location",
 			Description: "Oputut location for query results in S3.",
-			Placeholder: "s3://bucket-name/path/",
+			Placeholder: "bucket-name",
 			Type:        drivers.StringPropertyType,
 			Required:    true,
 		},
@@ -106,7 +108,6 @@ type sourceProperties struct {
 }
 
 func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
-	fmt.Println(props)
 	conf := &sourceProperties{}
 	err := mapstructure.Decode(props, conf)
 	if err != nil {
@@ -198,22 +199,24 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	err = c.unload(ctx, conf)
+	prefix := "parquet_output_" + uuid.New().String()
+	bucketName := strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://")
+	unloadPath := bucketName + "/" + prefix
+	err = c.unload(ctx, conf, "s3://"+unloadPath)
 	if err != nil {
 		return nil, fmt.Errorf("failed to unload: %w", err)
 	}
 
-	path := conf.OutputLocation + "/parquet_output"
-	bucketObj, err := c.openBucket(ctx, conf, path)
+	bucketObj, err := c.openBucket(ctx, conf, bucketName)
 	if err != nil {
-		return nil, fmt.Errorf("cannot open bucket: %w", err)
+		return nil, fmt.Errorf("cannot open bucket %q: %w", unloadPath, err)
 	}
 
 	opts := rillblob.Options{
 		ExtractPolicy: &runtimev1.Source_ExtractPolicy{
 			// FilesStrategy: runtimev1.Source_ExtractPolicy_STRATEGY_HEAD,
 		},
-		GlobPattern: "**/*",
+		GlobPattern: prefix + "/**",
 	}
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
@@ -221,7 +224,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		// TODO :: fix this for single file access. for single file first call only happens during download
 		var failureErr awserr.RequestFailure
 		if !errors.As(err, &failureErr) {
-			return nil, fmt.Errorf("failed to create the iterator %w", err)
+			return nil, fmt.Errorf("failed to create the iterator %q %w", unloadPath, err)
 		}
 
 		// check again
@@ -246,15 +249,10 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
 }
 
-func (c *Connection) unload(ctx context.Context, conf *sourceProperties) error {
-	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, conf.OutputLocation+"/parquet_output/") // todo create folder
-
-	cfg, err := config.LoadDefaultConfig(context.TODO(), func(o *config.LoadOptions) error {
-		// o.Region = "us-east-2"
-		return nil
-	}, config.WithSharedConfigProfile(conf.ProfileName))
-	fmt.Println("Executing : ", conf.ProfileName)
+func (c *Connection) unload(ctx context.Context, conf *sourceProperties, path string) error {
+	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
 
+	cfg, err := config.LoadDefaultConfig(context.TODO(), config.WithSharedConfigProfile(conf.ProfileName))
 	if err != nil {
 		return err
 	}
@@ -262,7 +260,7 @@ func (c *Connection) unload(ctx context.Context, conf *sourceProperties) error {
 	client := athena.NewFromConfig(cfg)
 
 	resultConfig := &types.ResultConfiguration{
-		OutputLocation: aws.String(conf.OutputLocation + "/output/"),
+		OutputLocation: aws.String("s3://" + strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://") + "/output/"),
 	}
 
 	executeParams := &athena.StartQueryExecutionInput{
@@ -280,7 +278,7 @@ func (c *Connection) unload(ctx context.Context, conf *sourceProperties) error {
 	// Get Query execution and check for the Query state constantly every 2 second
 	executionID := *athenaExecution.QueryExecutionId
 
-	r := retrier.New(retrier.LimitedExponentialBackoff(10, 100*time.Millisecond, 1*time.Second), nil) // 100 200 400 800 1000 1000 1000 1000 1000 1000
+	r := retrier.New(retrier.LimitedExponentialBackoff(20, 100*time.Millisecond, 1*time.Second), nil) // 100 200 400 800 1000 1000 1000 1000 1000 1000 ... < 20 sec
 
 	return r.Run(func() error {
 		status, stateErr := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
@@ -296,8 +294,8 @@ func (c *Connection) unload(ctx context.Context, conf *sourceProperties) error {
 		if state == types.QueryExecutionStateSucceeded || state == types.QueryExecutionStateCancelled {
 			return nil
 		} else if state == types.QueryExecutionStateFailed {
-			return fmt.Errorf("Athen query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
+			return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
 		}
-		return nil
+		return fmt.Errorf("Execution is not completed yet, current state: %s", state)
 	})
 }
diff --git a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
index e1da3b7ca40..866e2d0e7d7 100644
--- a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
@@ -65,7 +65,7 @@ func (t *fileStoreToDuckDB) Transfer(ctx context.Context, source drivers.Source,
 	}
 
 	// Ingest data
-	from, err := sourceReader(localPaths, format, ingestionProps)
+	from, err := sourceReader(localPaths, format, ingestionProps, false)
 	if err != nil {
 		return err
 	}
diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index c96e8bd309b..3d77130dcf2 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -4,10 +4,12 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"reflect"
 	"strings"
 	"time"
 
 	"github.com/rilldata/rill/runtime/drivers"
+	"github.com/rilldata/rill/runtime/drivers/athena"
 	"github.com/rilldata/rill/runtime/pkg/duckdbsql"
 	"github.com/rilldata/rill/runtime/pkg/fileutil"
 	"github.com/rilldata/rill/runtime/pkg/observability"
@@ -51,9 +53,10 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, source drivers.Sourc
 		return drivers.ErrIngestionLimitExceeded
 	}
 
+	fromAthena := reflect.TypeOf(t.from).AssignableTo(reflect.TypeOf(&athena.Connection{}))
 	sql, hasSQL := src.Properties["sql"].(string)
 	// if sql is specified use ast rewrite to fill in the downloaded files
-	if hasSQL {
+	if hasSQL && !fromAthena {
 		return t.ingestDuckDBSQL(ctx, sql, iterator, dbSink, opts, p)
 	}
 
@@ -63,6 +66,9 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, source drivers.Sourc
 	val, formatDefined := src.Properties["format"].(string)
 	if formatDefined {
 		format = fmt.Sprintf(".%s", val)
+	} else if fromAthena {
+		format = "parquet"
+		formatDefined = true
 	}
 
 	allowSchemaRelaxation, err := schemaRelaxationProperty(src.Properties)
@@ -97,11 +103,13 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, source drivers.Sourc
 		st := time.Now()
 		t.logger.Info("ingesting files", zap.Strings("files", files), observability.ZapCtx(ctx))
 		if appendToTable {
-			if err := a.appendData(ctx, files, format); err != nil {
+			if err := a.appendData(ctx, files, format, fromAthena); err != nil {
 				return err
 			}
 		} else {
-			from, err := sourceReader(files, format, ingestionProps)
+			var from string
+			var err error
+			from, err = sourceReader(files, format, ingestionProps, fromAthena)
 			if err != nil {
 				return err
 			}
@@ -142,8 +150,8 @@ func newAppender(to drivers.OLAPStore, sink *drivers.DatabaseSink, ingestionProp
 	}
 }
 
-func (a *appender) appendData(ctx context.Context, files []string, format string) error {
-	from, err := sourceReader(files, format, a.ingestionProps)
+func (a *appender) appendData(ctx context.Context, files []string, format string, fromAthena bool) error {
+	from, err := sourceReader(files, format, a.ingestionProps, fromAthena)
 	if err != nil {
 		return err
 	}
diff --git a/runtime/drivers/duckdb/transporter/utils.go b/runtime/drivers/duckdb/transporter/utils.go
index c6f9851b71e..3a3aee13233 100644
--- a/runtime/drivers/duckdb/transporter/utils.go
+++ b/runtime/drivers/duckdb/transporter/utils.go
@@ -27,12 +27,12 @@ func rawConn(conn *sql.Conn, f func(driver.Conn) error) error {
 	})
 }
 
-func sourceReader(paths []string, format string, ingestionProps map[string]any) (string, error) {
+func sourceReader(paths []string, format string, ingestionProps map[string]any, fromAthena bool) (string, error) {
 	// Generate a "read" statement
 	if containsAny(format, []string{".csv", ".tsv", ".txt"}) {
 		// CSV reader
 		return generateReadCsvStatement(paths, ingestionProps)
-	} else if strings.Contains(format, ".parquet") {
+	} else if strings.Contains(format, ".parquet") || fromAthena {
 		// Parquet reader
 		return generateReadParquetStatement(paths, ingestionProps)
 	} else if containsAny(format, []string{".json", ".ndjson"}) {

From a047f82cbbfd19043d82803599169f4b28ad659e Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Mon, 4 Sep 2023 12:22:13 +0300
Subject: [PATCH 03/40] athena-driver

---
 runtime/drivers/athena/athena.go | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index d918701e26c..c4f2f1620fa 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -10,7 +10,7 @@ import (
 
 	"github.com/aws/aws-sdk-go-v2/aws"
 
-	"github.com/aws/aws-sdk-go-v2/config"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
 	"github.com/aws/aws-sdk-go-v2/service/athena"
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
@@ -26,8 +26,6 @@ import (
 	"gocloud.dev/blob/s3blob"
 )
 
-const defaultPageSize = 20
-
 func init() {
 	drivers.Register("athena", driver{})
 	drivers.RegisterAsConnector("athena", driver{})
@@ -237,10 +235,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := config.LoadDefaultConfig(context.TODO(), func(o *config.LoadOptions) error {
-		// o.Region = conf.Region
-		return nil
-	}, config.WithSharedConfigProfile(conf.ProfileName))
+	cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), awsconfig.WithSharedConfigProfile(conf.ProfileName))
 	if err != nil {
 		return nil, err
 	}
@@ -252,7 +247,7 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 func (c *Connection) unload(ctx context.Context, conf *sourceProperties, path string) error {
 	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
 
-	cfg, err := config.LoadDefaultConfig(context.TODO(), config.WithSharedConfigProfile(conf.ProfileName))
+	cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), awsconfig.WithSharedConfigProfile(conf.ProfileName))
 	if err != nil {
 		return err
 	}

From dbba228ce80b1c6d3ce4448afb8facdddfa16cd7 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Mon, 4 Sep 2023 12:50:01 +0300
Subject: [PATCH 04/40] athena-driver

---
 runtime/drivers/athena/athena.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index c4f2f1620fa..1b2c95d42c2 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -9,7 +9,6 @@ import (
 	"time"
 
 	"github.com/aws/aws-sdk-go-v2/aws"
-
 	awsconfig "github.com/aws/aws-sdk-go-v2/config"
 	"github.com/aws/aws-sdk-go-v2/service/athena"
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
@@ -265,7 +264,6 @@ func (c *Connection) unload(ctx context.Context, conf *sourceProperties, path st
 
 	// Start Query Execution
 	athenaExecution, err := client.StartQueryExecution(ctx, executeParams)
-
 	if err != nil {
 		return err
 	}

From 985f1955489f8a527de2a2a5287f1936b6e9891f Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 16:01:07 +0300
Subject: [PATCH 05/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 1b2c95d42c2..e65e860d61f 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -262,24 +262,20 @@ func (c *Connection) unload(ctx context.Context, conf *sourceProperties, path st
 		ResultConfiguration: resultConfig,
 	}
 
-	// Start Query Execution
 	athenaExecution, err := client.StartQueryExecution(ctx, executeParams)
 	if err != nil {
 		return err
 	}
 
-	// Get Query execution and check for the Query state constantly every 2 second
-	executionID := *athenaExecution.QueryExecutionId
+	r := retrier.New(retrier.ConstantBackoff(20, 1*time.Second), nil)
 
-	r := retrier.New(retrier.LimitedExponentialBackoff(20, 100*time.Millisecond, 1*time.Second), nil) // 100 200 400 800 1000 1000 1000 1000 1000 1000 ... < 20 sec
-
-	return r.Run(func() error {
-		status, stateErr := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
-			QueryExecutionId: &executionID,
+	return r.RunCtx(ctx, func(ctx context.Context) error {
+		status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
+			QueryExecutionId: athenaExecution.QueryExecutionId,
 		})
 
-		if stateErr != nil {
-			return stateErr
+		if err != nil {
+			return err
 		}
 
 		state := status.QueryExecution.Status.State

From 3db7655b976a7b7040cc131c5617261c1c8a5996 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 18:48:43 +0300
Subject: [PATCH 06/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 63 ++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index e65e860d61f..e47c480d52c 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -13,6 +13,8 @@ import (
 	"github.com/aws/aws-sdk-go-v2/service/athena"
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
+	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
+
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/eapache/go-resiliency/retrier"
 	"github.com/google/uuid"
@@ -187,46 +189,68 @@ func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) {
 	return nil, false
 }
 
-// DownloadFiles returns a file iterator over objects stored in gcs.
-// The credential json is read from config google_application_credentials.
-// Additionally in case `allow_host_credentials` is true it looks for "Application Default Credentials" as well
+func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) error {
+	s3client := s3v2.NewFromConfig(cfg)
+	out, err := s3client.ListObjectsV2(ctx, &s3v2.ListObjectsV2Input{
+		Bucket: &bucketName,
+		Prefix: &prefix,
+	})
+	if err != nil {
+		return err
+	}
+
+	ids := make([]s3v2types.ObjectIdentifier, 0, len(out.Contents))
+	for _, o := range out.Contents {
+		ids = append(ids, s3v2types.ObjectIdentifier{
+			Key: o.Key,
+		})
+	}
+	_, err = s3client.DeleteObjects(ctx, &s3v2.DeleteObjectsInput{
+		Delete: &s3v2types.Delete{
+			Objects: ids,
+		},
+	})
+	return err
+}
+
 func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSource) (drivers.FileIterator, error) {
 	conf, err := parseSourceProperties(source.Properties)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
+	cfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithSharedConfigProfile(conf.ProfileName))
+	if err != nil {
+		return nil, err
+	}
+
 	prefix := "parquet_output_" + uuid.New().String()
 	bucketName := strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://")
 	unloadPath := bucketName + "/" + prefix
-	err = c.unload(ctx, conf, "s3://"+unloadPath)
+	err = c.unload(ctx, cfg, conf, "s3://"+unloadPath)
 	if err != nil {
-		return nil, fmt.Errorf("failed to unload: %w", err)
+		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, prefix))
 	}
 
 	bucketObj, err := c.openBucket(ctx, conf, bucketName)
 	if err != nil {
-		return nil, fmt.Errorf("cannot open bucket %q: %w", unloadPath, err)
+		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", unloadPath, err), cleanPath(ctx, cfg, bucketName, prefix))
 	}
 
 	opts := rillblob.Options{
-		ExtractPolicy: &runtimev1.Source_ExtractPolicy{
-			// FilesStrategy: runtimev1.Source_ExtractPolicy_STRATEGY_HEAD,
-		},
-		GlobPattern: prefix + "/**",
+		ExtractPolicy: &runtimev1.Source_ExtractPolicy{},
+		GlobPattern:   prefix + "/**",
 	}
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		// TODO :: fix this for single file access. for single file first call only happens during download
 		var failureErr awserr.RequestFailure
 		if !errors.As(err, &failureErr) {
-			return nil, fmt.Errorf("failed to create the iterator %q %w", unloadPath, err)
+			return nil, errors.Join(fmt.Errorf("failed to create the iterator %q %w", unloadPath, err), cleanPath(ctx, cfg, bucketName, prefix))
 		}
 
-		// check again
 		if errors.As(err, &failureErr) && (failureErr.StatusCode() == http.StatusForbidden || failureErr.StatusCode() == http.StatusBadRequest) {
-			return nil, drivers.NewPermissionDeniedError(fmt.Sprintf("can't access remote err: %v", failureErr))
+			return nil, errors.Join(drivers.NewPermissionDeniedError(fmt.Sprintf("can't access remote err: %v", failureErr)), cleanPath(ctx, cfg, bucketName, prefix))
 		}
 	}
 
@@ -234,7 +258,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), awsconfig.WithSharedConfigProfile(conf.ProfileName))
+	cfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithSharedConfigProfile(conf.ProfileName))
 	if err != nil {
 		return nil, err
 	}
@@ -243,16 +267,9 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
 }
 
-func (c *Connection) unload(ctx context.Context, conf *sourceProperties, path string) error {
+func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourceProperties, path string) error {
 	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
-
-	cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), awsconfig.WithSharedConfigProfile(conf.ProfileName))
-	if err != nil {
-		return err
-	}
-
 	client := athena.NewFromConfig(cfg)
-
 	resultConfig := &types.ResultConfiguration{
 		OutputLocation: aws.String("s3://" + strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://") + "/output/"),
 	}

From 5c66737b2211bb96289c98386523be75a9658b21 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 18:51:26 +0300
Subject: [PATCH 07/40] athena-driver review

---
 runtime/drivers/athena/athena.go                   | 8 ++++----
 runtime/services/catalog/artifacts/yaml/objects.go | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index e47c480d52c..50db075f588 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -46,7 +46,7 @@ var spec = drivers.Spec{
 			Placeholder: "select * from catalog.table;",
 		},
 		{
-			Key:         "output.location",
+			Key:         "output_location",
 			DisplayName: "Output location",
 			Description: "Oputut location for query results in S3.",
 			Placeholder: "bucket-name",
@@ -54,7 +54,7 @@ var spec = drivers.Spec{
 			Required:    true,
 		},
 		{
-			Key:         "profile.name",
+			Key:         "profile_name",
 			DisplayName: "AWS profile",
 			Description: "AWS profile for credentials.",
 			Type:        drivers.StringPropertyType,
@@ -102,8 +102,8 @@ func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source
 
 type sourceProperties struct {
 	SQL            string `mapstructure:"sql"`
-	OutputLocation string `mapstructure:"output.location"`
-	ProfileName    string `mapstructure:"profile.name"`
+	OutputLocation string `mapstructure:"output_location"`
+	ProfileName    string `mapstructure:"profile_name"`
 }
 
 func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
diff --git a/runtime/services/catalog/artifacts/yaml/objects.go b/runtime/services/catalog/artifacts/yaml/objects.go
index 867a09d68ae..fafaa0566ab 100644
--- a/runtime/services/catalog/artifacts/yaml/objects.go
+++ b/runtime/services/catalog/artifacts/yaml/objects.go
@@ -45,8 +45,8 @@ type Source struct {
 	SQL                   string         `yaml:"sql,omitempty" mapstructure:"sql,omitempty"`
 	DB                    string         `yaml:"db,omitempty" mapstructure:"db,omitempty"`
 	ProjectID             string         `yaml:"project_id,omitempty" mapstructure:"project_id,omitempty"`
-	AthenaOutputLocation  string         `yaml:"output.location,omitempty" mapstructure:"output.location,omitempty"`
-	AthenaProfileName     string         `yaml:"profile.name,omitempty" mapstructure:"profile.name,omitempty"`
+	AthenaOutputLocation  string         `yaml:"output_location,omitempty" mapstructure:"output_location,omitempty"`
+	AthenaProfileName     string         `yaml:"profile_name,omitempty" mapstructure:"profile_name,omitempty"`
 }
 
 type ExtractPolicy struct {
@@ -242,11 +242,11 @@ func fromSourceArtifact(source *Source, path string) (*drivers.CatalogEntry, err
 	}
 
 	if source.AthenaOutputLocation != "" {
-		props["output.location"] = source.AthenaOutputLocation
+		props["output_location"] = source.AthenaOutputLocation
 	}
 
 	if source.AthenaProfileName != "" {
-		props["profile.name"] = source.AthenaProfileName
+		props["profile_name"] = source.AthenaProfileName
 	}
 
 	propsPB, err := structpb.NewStruct(props)

From 605b7911c1126c25ad53191e9478b6278cd9211b Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 18:55:44 +0300
Subject: [PATCH 08/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 50db075f588..f8f4bf194d8 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -238,8 +238,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 	}
 
 	opts := rillblob.Options{
-		ExtractPolicy: &runtimev1.Source_ExtractPolicy{},
-		GlobPattern:   prefix + "/**",
+		GlobPattern: prefix + "/**",
 	}
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)

From 200815505e7174578bdbbac76cc5c2b2c0fbbef1 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 20:34:49 +0300
Subject: [PATCH 09/40] athena-driver review

---
 runtime/drivers/athena/athena.go              | 40 ++++++++++++++-----
 .../catalog/artifacts/yaml/objects.go         |  5 ---
 .../catalog/migrator/sources/sources.go       |  4 ++
 .../src/features/sources/modal/yupSchemas.ts  |  2 +-
 4 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index f8f4bf194d8..7e8c0cb8f64 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/aws/aws-sdk-go-v2/aws"
 	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
 	"github.com/aws/aws-sdk-go-v2/service/athena"
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
@@ -19,7 +20,6 @@ import (
 	"github.com/eapache/go-resiliency/retrier"
 	"github.com/google/uuid"
 	"github.com/mitchellh/mapstructure"
-	runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
 	"github.com/rilldata/rill/runtime/drivers"
 	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
 	"go.uber.org/zap"
@@ -47,28 +47,38 @@ var spec = drivers.Spec{
 		},
 		{
 			Key:         "output_location",
-			DisplayName: "Output location",
+			DisplayName: "S3 output location",
 			Description: "Oputut location for query results in S3.",
-			Placeholder: "bucket-name",
+			Placeholder: "mybucket",
 			Type:        drivers.StringPropertyType,
 			Required:    true,
 		},
 		{
-			Key:         "profile_name",
-			DisplayName: "AWS profile",
+			Key:         "region",
+			DisplayName: "AWS region",
 			Description: "AWS profile for credentials.",
 			Type:        drivers.StringPropertyType,
 			Required:    true,
 		},
 	},
-	ConfigProperties: []drivers.PropertySchema{},
+	ConfigProperties: []drivers.PropertySchema{
+		{
+			Key:    "aws_access_key_id",
+			Secret: true,
+		},
+		{
+			Key:    "aws_secret_access_key",
+			Secret: true,
+		},
+	},
 }
 
 type driver struct{}
 
 type configProperties struct {
-	// SecretJSON      string `mapstructure:"google_application_credentials"`
-	// AllowHostAccess bool   `mapstructure:"allow_host_access"`
+	AccessKeyID     string `mapstructure:"aws_access_key_id"`
+	SecretAccessKey string `mapstructure:"aws_secret_access_key"`
+	SessionToken    string `mapstructure:"aws_access_token"`
 }
 
 func (d driver) Open(config map[string]any, shared bool, logger *zap.Logger) (drivers.Handle, error) {
@@ -103,7 +113,7 @@ func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source
 type sourceProperties struct {
 	SQL            string `mapstructure:"sql"`
 	OutputLocation string `mapstructure:"output_location"`
-	ProfileName    string `mapstructure:"profile_name"`
+	Region    string `mapstructure:"region"`
 }
 
 func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
@@ -219,7 +229,11 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	cfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithSharedConfigProfile(conf.ProfileName))
+	cfg, err := awsconfig.LoadDefaultConfig(
+		ctx, 
+		awsconfig.WithRegion(conf.Region),
+		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken))
+	)
 	if err != nil {
 		return nil, err
 	}
@@ -257,7 +271,11 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithSharedConfigProfile(conf.ProfileName))
+	cfg, err := awsconfig.LoadDefaultConfig(
+		ctx, 
+		awsconfig.WithRegion(conf.Region),
+		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken))
+	)
 	if err != nil {
 		return nil, err
 	}
diff --git a/runtime/services/catalog/artifacts/yaml/objects.go b/runtime/services/catalog/artifacts/yaml/objects.go
index fafaa0566ab..271d9541d19 100644
--- a/runtime/services/catalog/artifacts/yaml/objects.go
+++ b/runtime/services/catalog/artifacts/yaml/objects.go
@@ -46,7 +46,6 @@ type Source struct {
 	DB                    string         `yaml:"db,omitempty" mapstructure:"db,omitempty"`
 	ProjectID             string         `yaml:"project_id,omitempty" mapstructure:"project_id,omitempty"`
 	AthenaOutputLocation  string         `yaml:"output_location,omitempty" mapstructure:"output_location,omitempty"`
-	AthenaProfileName     string         `yaml:"profile_name,omitempty" mapstructure:"profile_name,omitempty"`
 }
 
 type ExtractPolicy struct {
@@ -245,10 +244,6 @@ func fromSourceArtifact(source *Source, path string) (*drivers.CatalogEntry, err
 		props["output_location"] = source.AthenaOutputLocation
 	}
 
-	if source.AthenaProfileName != "" {
-		props["profile_name"] = source.AthenaProfileName
-	}
-
 	propsPB, err := structpb.NewStruct(props)
 	if err != nil {
 		return nil, err
diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go
index 39a5b0d31d4..b171ea305d5 100644
--- a/runtime/services/catalog/migrator/sources/sources.go
+++ b/runtime/services/catalog/migrator/sources/sources.go
@@ -432,6 +432,10 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s
 		vars["aws_access_key_id"] = env["aws_access_key_id"]
 		vars["aws_secret_access_key"] = env["aws_secret_access_key"]
 		vars["aws_session_token"] = env["aws_session_token"]
+	case "athena":
+		vars["aws_access_key_id"] = env["aws_access_key_id"]
+		vars["aws_secret_access_key"] = env["aws_secret_access_key"]
+		vars["aws_session_token"] = env["aws_session_token"]
 	case "gcs":
 		vars["google_application_credentials"] = env["google_application_credentials"]
 	case "motherduck":
diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts
index 45412cd33a2..3ef6d357edb 100644
--- a/web-common/src/features/sources/modal/yupSchemas.ts
+++ b/web-common/src/features/sources/modal/yupSchemas.ts
@@ -80,7 +80,7 @@ export function getYupSchema(connector: V1ConnectorSpec) {
           )
           .required("Source name is required"),
         output_location: yup.string().required(),
-        profile_name: yup.string().required(),
+        region: yup.string(),
       });
 
     default:

From 00d3ec05c81acd560c9cc61ce9735ff588edafce Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 5 Sep 2023 20:48:23 +0300
Subject: [PATCH 10/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 7e8c0cb8f64..d889d3a9f00 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -257,17 +257,10 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		var failureErr awserr.RequestFailure
-		if !errors.As(err, &failureErr) {
-			return nil, errors.Join(fmt.Errorf("failed to create the iterator %q %w", unloadPath, err), cleanPath(ctx, cfg, bucketName, prefix))
-		}
-
-		if errors.As(err, &failureErr) && (failureErr.StatusCode() == http.StatusForbidden || failureErr.StatusCode() == http.StatusBadRequest) {
-			return nil, errors.Join(drivers.NewPermissionDeniedError(fmt.Sprintf("can't access remote err: %v", failureErr)), cleanPath(ctx, cfg, bucketName, prefix))
-		}
+		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, prefix))
 	}
 
-	return it, err
+	return it 
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {

From dc520560cc97223c92160c5cab71c9a03287234a Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 6 Sep 2023 10:59:19 +0300
Subject: [PATCH 11/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index d889d3a9f00..cabc72c20a2 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -113,7 +113,7 @@ func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source
 type sourceProperties struct {
 	SQL            string `mapstructure:"sql"`
 	OutputLocation string `mapstructure:"output_location"`
-	Region    string `mapstructure:"region"`
+	Region         string `mapstructure:"region"`
 }
 
 func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
@@ -230,9 +230,9 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 	}
 
 	cfg, err := awsconfig.LoadDefaultConfig(
-		ctx, 
+		ctx,
 		awsconfig.WithRegion(conf.Region),
-		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken))
+		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
 	)
 	if err != nil {
 		return nil, err
@@ -260,14 +260,14 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, prefix))
 	}
 
-	return it 
+	return it, nil
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
 	cfg, err := awsconfig.LoadDefaultConfig(
-		ctx, 
+		ctx,
 		awsconfig.WithRegion(conf.Region),
-		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken))
+		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
 	)
 	if err != nil {
 		return nil, err

From d7e774b7a0f58050fafe4edfa40ff9619b4961a2 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 6 Sep 2023 12:08:27 +0300
Subject: [PATCH 12/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index cabc72c20a2..c934a78d4ff 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"net/http"
 	"strings"
 	"time"
 
@@ -16,7 +15,6 @@ import (
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
 	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
 
-	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/eapache/go-resiliency/retrier"
 	"github.com/google/uuid"
 	"github.com/mitchellh/mapstructure"

From 29b816e18421b9dff8607406143e1478d461403b Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 6 Sep 2023 13:35:52 +0300
Subject: [PATCH 13/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 runtime/drivers/duckdb/transporter/utils.go | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/runtime/drivers/duckdb/transporter/utils.go b/runtime/drivers/duckdb/transporter/utils.go
index 32d21aeb857..ee609e148ec 100644
--- a/runtime/drivers/duckdb/transporter/utils.go
+++ b/runtime/drivers/duckdb/transporter/utils.go
@@ -7,24 +7,6 @@ import (
 	"strings"
 )
 
-// rawConn is similar to *sql.Conn.Raw, but additionally unwraps otelsql (which we use for instrumentation).
-func rawConn(conn *sql.Conn, f func(driver.Conn) error) error {
-	return conn.Raw(func(raw any) error {
-		// For details, see: https://github.com/XSAM/otelsql/issues/98
-		if c, ok := raw.(interface{ Raw() driver.Conn }); ok {
-			raw = c.Raw()
-		}
-
-		// This is currently guaranteed, but adding check to be safe
-		driverConn, ok := raw.(driver.Conn)
-		if !ok {
-			return fmt.Errorf("internal: did not obtain a driver.Conn")
-		}
-
-		return f(driverConn)
-	})
-}
-
 func sourceReader(paths []string, format string, ingestionProps map[string]any, fromAthena bool) (string, error) {
 	// Generate a "read" statement
 	if containsAny(format, []string{".csv", ".tsv", ".txt"}) {

From e5f1794b313f87e6c748c05d03b3b7475901c9bc Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 6 Sep 2023 13:43:00 +0300
Subject: [PATCH 14/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 runtime/drivers/athena/athena.go                         | 5 ++---
 runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index c934a78d4ff..ea3a12a7789 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -14,12 +14,12 @@ import (
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
 	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
-
 	"github.com/eapache/go-resiliency/retrier"
 	"github.com/google/uuid"
 	"github.com/mitchellh/mapstructure"
 	"github.com/rilldata/rill/runtime/drivers"
 	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
+	"github.com/rilldata/rill/runtime/pkg/activity"
 	"go.uber.org/zap"
 	"gocloud.dev/blob"
 	"gocloud.dev/blob/s3blob"
@@ -79,7 +79,7 @@ type configProperties struct {
 	SessionToken    string `mapstructure:"aws_access_token"`
 }
 
-func (d driver) Open(config map[string]any, shared bool, logger *zap.Logger) (drivers.Handle, error) {
+func (d driver) Open(config map[string]any, shared bool, client activity.Client, logger *zap.Logger) (drivers.Handle, error) {
 	if shared {
 		return nil, fmt.Errorf("athena driver can't be shared")
 	}
@@ -298,7 +298,6 @@ func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourcePro
 		status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
 			QueryExecutionId: athenaExecution.QueryExecutionId,
 		})
-
 		if err != nil {
 			return err
 		}
diff --git a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
index 0f295d1bfef..5d0de761bf6 100644
--- a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
@@ -62,7 +62,7 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, source drivers.Source,
 		}
 
 		format := fileutil.FullExt(files[0])
-		from, err := sourceReader(files, format, make(map[string]any))
+		from, err := sourceReader(files, format, make(map[string]any), false)
 		if err != nil {
 			return err
 		}

From 549cf675206da35a594f116bbb3a0d9a6d15877d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Egelund-M=C3=BCller?= <b@egelund-muller.com>
Date: Thu, 7 Sep 2023 11:19:59 +0200
Subject: [PATCH 15/40] Run go mod tidy

---
 go.mod | 14 ++++++--------
 go.sum |  7 -------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/go.mod b/go.mod
index 9d32dca8279..d23c70a8746 100644
--- a/go.mod
+++ b/go.mod
@@ -14,6 +14,7 @@ require (
 	github.com/apache/arrow/go/v13 v13.0.0
 	github.com/apache/calcite-avatica-go/v5 v5.2.0
 	github.com/aws/aws-sdk-go v1.44.268
+	github.com/aws/aws-sdk-go-v2/service/athena v1.31.6
 	github.com/benbjohnson/clock v1.3.5
 	github.com/bmatcuk/doublestar/v4 v4.6.0
 	github.com/bradleyfalzon/ghinstallation/v2 v2.4.0
@@ -86,10 +87,7 @@ require (
 	moul.io/zapfilter v1.7.0
 )
 
-require (
-	github.com/aws/aws-sdk-go-v2/service/athena v1.31.6 // indirect
-	google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect
-)
+require google.golang.org/genproto v0.0.0-20230530153820-e85fd2cbaebc // indirect
 
 require (
 	cloud.google.com/go v0.110.2 // indirect
@@ -107,10 +105,10 @@ require (
 	github.com/alicebob/gopher-json v0.0.0-20230218143504-906a9b012302 // indirect
 	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/apache/thrift v0.18.1 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.21.0 // indirect
+	github.com/aws/aws-sdk-go-v2 v1.21.0
 	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.10 // indirect
-	github.com/aws/aws-sdk-go-v2/config v1.18.25 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.13.24 // indirect
+	github.com/aws/aws-sdk-go-v2/config v1.18.25
+	github.com/aws/aws-sdk-go-v2/credentials v1.13.24
 	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.3 // indirect
 	github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.67 // indirect
 	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 // indirect
@@ -121,7 +119,7 @@ require (
 	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.28 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.27 // indirect
 	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.14.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1 // indirect
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.33.1
 	github.com/aws/aws-sdk-go-v2/service/sso v1.12.10 // indirect
 	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.10 // indirect
 	github.com/aws/aws-sdk-go-v2/service/sts v1.19.0 // indirect
diff --git a/go.sum b/go.sum
index 1fd02bb6401..dd66628f700 100644
--- a/go.sum
+++ b/go.sum
@@ -592,7 +592,6 @@ github.com/aws/aws-sdk-go v1.44.268 h1:WoK20tlAvsvQzTcE6TajoprbXmTbcud6MjhErL4P/
 github.com/aws/aws-sdk-go v1.44.268/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
 github.com/aws/aws-sdk-go-v2 v1.9.1/go.mod h1:cK/D0BBs0b/oWPIcX/Z/obahJK1TT7IPVjy53i/mX/4=
 github.com/aws/aws-sdk-go-v2 v1.17.4/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
-github.com/aws/aws-sdk-go-v2 v1.18.0 h1:882kkTpSFhdgYRKVZ/VCgf7sd0ru57p2JCxz4/oN5RY=
 github.com/aws/aws-sdk-go-v2 v1.18.0/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
 github.com/aws/aws-sdk-go-v2 v1.21.0 h1:gMT0IW+03wtYJhRqTVYn0wLzwdnK9sRMcxmtfGzRdJc=
 github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M=
@@ -611,12 +610,10 @@ github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.51/go.mod h1:7Grl2gV+dx9SW
 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.67 h1:fI9/5BDEaAv/pv1VO1X1n3jfP9it+IGqWsCuuBQI8wM=
 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.67/go.mod h1:zQClPRIwQZfJlZq6WZve+s4Tb4JW+3V6eS+4+KrYeP8=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28/go.mod h1:3lwChorpIM/BhImY/hy+Z6jekmN92cXGPI1QJasVPYY=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33 h1:kG5eQilShqmJbv11XL1VpyDbaEJzWxd4zRiCG30GSn4=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.33/go.mod h1:7i0PF1ME/2eUPFcjkVIwq+DOygHEoK92t5cDqNgYbIw=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g=
 github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22/go.mod h1:EqK7gVrIGAHyZItrD1D8B0ilgwMD1GiWAmbU4u/JHNk=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27 h1:vFQlirhuM8lLlpI7imKOMsjdQLuN9CPi+k44F/OFVsk=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.27/go.mod h1:UrHnn3QV/d0pBZ6QBAEQcqFLf8FAzLmoUfPVIueOvoM=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI=
 github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35/go.mod h1:SJC1nEVVva1g3pHAIdCp7QsRIkMmLAgoDquQ9Rr8kYw=
@@ -658,7 +655,6 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.18.3/go.mod h1:b+psTJn33Q4qGoDaM7ZiO
 github.com/aws/aws-sdk-go-v2/service/sts v1.19.0 h1:2DQLAKDteoEDI8zpCzqBMaZlJuoE9iTYD0gFmXVax9E=
 github.com/aws/aws-sdk-go-v2/service/sts v1.19.0/go.mod h1:BgQOMsg8av8jset59jelyPW7NoZcZXLVpDsXunGDrk8=
 github.com/aws/smithy-go v1.8.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E=
-github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
 github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
 github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
 github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
@@ -2006,7 +2002,6 @@ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
 github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
 github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
-github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
 github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
 github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
 github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
@@ -3291,9 +3286,7 @@ modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0=
 modernc.org/ccgo/v3 v3.16.13 h1:Mkgdzl46i5F/CNR/Kj80Ri59hC8TKAhZrYSaqvkwzUw=
 modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY=
 modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk=
-modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ=
 modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM=
-modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM=
 modernc.org/libc v1.22.6 h1:cbXU8R+A6aOjRuhsFh3nbDWXO/Hs4ClJRXYB11KmPDo=
 modernc.org/libc v1.22.6/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
 modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=

From e04060636102d42c50a23ef269241aef71b9713f Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Mon, 11 Sep 2023 19:37:29 +0300
Subject: [PATCH 16/40] athena-driver review

---
 runtime/drivers/athena/athena.go                               | 2 +-
 .../src/features/sources/modal/submitRemoteSourceForm.ts       | 3 ++-
 web-common/src/features/sources/modal/yupSchemas.ts            | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index ea3a12a7789..0062c56dae9 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -54,7 +54,7 @@ var spec = drivers.Spec{
 		{
 			Key:         "region",
 			DisplayName: "AWS region",
-			Description: "AWS profile for credentials.",
+			Description: "AWS region",
 			Type:        drivers.StringPropertyType,
 			Required:    true,
 		},
diff --git a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
index 82a7f8f9d6b..31f22b6d946 100644
--- a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
+++ b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
@@ -63,7 +63,8 @@ export async function submitRemoteSourceForm(
   const formValues = Object.fromEntries(
     Object.entries(values).map(([key, value]) => {
       switch (key) {
-        case "project_id":
+        case "project_id": 
+        case "output_location":
           return [key, value];
         default:
           return [fromYupFriendlyKey(key), value];
diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts
index 3ef6d357edb..a67ae145466 100644
--- a/web-common/src/features/sources/modal/yupSchemas.ts
+++ b/web-common/src/features/sources/modal/yupSchemas.ts
@@ -80,7 +80,7 @@ export function getYupSchema(connector: V1ConnectorSpec) {
           )
           .required("Source name is required"),
         output_location: yup.string().required(),
-        region: yup.string(),
+        region: yup.string().required(),
       });
 
     default:

From 804f03e6a3796743fddd7d6a8c90f68f996f7106 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Mon, 11 Sep 2023 19:52:48 +0300
Subject: [PATCH 17/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 0062c56dae9..b36bf6685be 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -47,7 +47,7 @@ var spec = drivers.Spec{
 			Key:         "output_location",
 			DisplayName: "S3 output location",
 			Description: "Oputut location for query results in S3.",
-			Placeholder: "mybucket",
+			Placeholder: "s3://bucket-name/path/",
 			Type:        drivers.StringPropertyType,
 			Required:    true,
 		},
@@ -236,26 +236,31 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, err
 	}
 
-	prefix := "parquet_output_" + uuid.New().String()
-	bucketName := strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://")
-	unloadPath := bucketName + "/" + prefix
-	err = c.unload(ctx, cfg, conf, "s3://"+unloadPath)
+	unloadPath := "parquet_output_" + uuid.New().String()
+	bucketName := strings.Split(strings.TrimPrefix(conf.OutputLocation, "s3://"), "/")[0]
+	unloadLocation := strings.TrimRight(conf.OutputLocation, "/") + "/" + unloadPath
+	err = c.unload(ctx, cfg, conf, unloadLocation)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, prefix))
+		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, unloadPath))
 	}
 
 	bucketObj, err := c.openBucket(ctx, conf, bucketName)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", unloadPath, err), cleanPath(ctx, cfg, bucketName, prefix))
+		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", unloadLocation, err), cleanPath(ctx, cfg, bucketName, unloadPath))
 	}
 
 	opts := rillblob.Options{
-		GlobPattern: prefix + "/**",
+		GlobPattern: unloadPath + "/**",
 	}
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, prefix))
+		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, unloadPath))
+	}
+
+	err = cleanPath(ctx, cfg, bucketName, unloadPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to clean path: %w", err)
 	}
 
 	return it, nil
@@ -279,7 +284,7 @@ func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourcePro
 	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
 	client := athena.NewFromConfig(cfg)
 	resultConfig := &types.ResultConfiguration{
-		OutputLocation: aws.String("s3://" + strings.TrimPrefix(strings.TrimRight(conf.OutputLocation, "/"), "s3://") + "/output/"),
+		OutputLocation: aws.String(strings.TrimRight(conf.OutputLocation, "/") + "/output/"),
 	}
 
 	executeParams := &athena.StartQueryExecutionInput{

From 844877f016bf8d35bbeb7e62af3b689e42820489 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 12 Sep 2023 18:18:41 +0300
Subject: [PATCH 18/40] athena-driver review

---
 runtime/drivers/athena/athena.go              | 57 +++++++++++++------
 .../catalog/artifacts/yaml/objects.go         |  9 ++-
 .../sources/modal/submitRemoteSourceForm.ts   |  3 +-
 .../src/features/sources/modal/yupSchemas.ts  |  4 +-
 4 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index b36bf6685be..30ac103255c 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -44,7 +44,7 @@ var spec = drivers.Spec{
 			Placeholder: "select * from catalog.table;",
 		},
 		{
-			Key:         "output_location",
+			Key:         "athena_output_location",
 			DisplayName: "S3 output location",
 			Description: "Oputut location for query results in S3.",
 			Placeholder: "s3://bucket-name/path/",
@@ -52,11 +52,11 @@ var spec = drivers.Spec{
 			Required:    true,
 		},
 		{
-			Key:         "region",
-			DisplayName: "AWS region",
-			Description: "AWS region",
+			Key:         "athena_workgroup",
+			DisplayName: "AWS Athena workgroup",
+			Description: "AWS Athena workgroup to use for queries.",
 			Type:        drivers.StringPropertyType,
-			Required:    true,
+			Required:    false,
 		},
 	},
 	ConfigProperties: []drivers.PropertySchema{
@@ -77,6 +77,7 @@ type configProperties struct {
 	AccessKeyID     string `mapstructure:"aws_access_key_id"`
 	SecretAccessKey string `mapstructure:"aws_secret_access_key"`
 	SessionToken    string `mapstructure:"aws_access_token"`
+	AllowHostAccess bool   `mapstructure:"allow_host_access"`
 }
 
 func (d driver) Open(config map[string]any, shared bool, client activity.Client, logger *zap.Logger) (drivers.Handle, error) {
@@ -105,13 +106,13 @@ func (d driver) Spec() drivers.Spec {
 }
 
 func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) {
-	return false, fmt.Errorf("not implemented")
+	return false, nil
 }
 
 type sourceProperties struct {
 	SQL            string `mapstructure:"sql"`
-	OutputLocation string `mapstructure:"output_location"`
-	Region         string `mapstructure:"region"`
+	OutputLocation string `mapstructure:"athena_output_location"`
+	WorkGroup      string `mapstructure:"athena_workgroup"`
 }
 
 func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
@@ -227,11 +228,7 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	cfg, err := awsconfig.LoadDefaultConfig(
-		ctx,
-		awsconfig.WithRegion(conf.Region),
-		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
-	)
+	cfg, err := c.Cfg(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -266,12 +263,28 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 	return it, nil
 }
 
+func (c *Connection) Cfg(ctx context.Context) (aws.Config, error) {
+	var cfg aws.Config
+	var err error
+	if c.config.AllowHostAccess {
+		cfg, err = awsconfig.LoadDefaultConfig(
+			ctx,
+		)
+	} else {
+		cfg, err = awsconfig.LoadDefaultConfig(
+			ctx,
+			awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
+		)
+	}
+	if err != nil {
+		return aws.Config{}, err
+	}
+
+	return cfg, nil
+}
+
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := awsconfig.LoadDefaultConfig(
-		ctx,
-		awsconfig.WithRegion(conf.Region),
-		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
-	)
+	cfg, err := c.Cfg(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -283,6 +296,7 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourceProperties, path string) error {
 	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
 	client := athena.NewFromConfig(cfg)
+
 	resultConfig := &types.ResultConfiguration{
 		OutputLocation: aws.String(strings.TrimRight(conf.OutputLocation, "/") + "/output/"),
 	}
@@ -292,6 +306,13 @@ func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourcePro
 		ResultConfiguration: resultConfig,
 	}
 
+	if conf.WorkGroup != "" {
+		executeParams = &athena.StartQueryExecutionInput{
+			QueryString: aws.String(finalSQL),
+			WorkGroup:   aws.String(conf.WorkGroup),
+		}
+	}
+
 	athenaExecution, err := client.StartQueryExecution(ctx, executeParams)
 	if err != nil {
 		return err
diff --git a/runtime/services/catalog/artifacts/yaml/objects.go b/runtime/services/catalog/artifacts/yaml/objects.go
index 271d9541d19..d74fbfc79c2 100644
--- a/runtime/services/catalog/artifacts/yaml/objects.go
+++ b/runtime/services/catalog/artifacts/yaml/objects.go
@@ -45,7 +45,8 @@ type Source struct {
 	SQL                   string         `yaml:"sql,omitempty" mapstructure:"sql,omitempty"`
 	DB                    string         `yaml:"db,omitempty" mapstructure:"db,omitempty"`
 	ProjectID             string         `yaml:"project_id,omitempty" mapstructure:"project_id,omitempty"`
-	AthenaOutputLocation  string         `yaml:"output_location,omitempty" mapstructure:"output_location,omitempty"`
+	AthenaOutputLocation  string         `yaml:"athena_output_location,omitempty" mapstructure:"athena_output_location,omitempty"`
+	AthenaWorkgroup       string         `yaml:"athena_workgroup,omitempty" mapstructure:"athena_workgroup,omitempty"`
 }
 
 type ExtractPolicy struct {
@@ -241,7 +242,11 @@ func fromSourceArtifact(source *Source, path string) (*drivers.CatalogEntry, err
 	}
 
 	if source.AthenaOutputLocation != "" {
-		props["output_location"] = source.AthenaOutputLocation
+		props["athena_output_location"] = source.AthenaOutputLocation
+	}
+
+	if source.AthenaWorkgroup != "" {
+		props["athena_workgroup"] = source.AthenaWorkgroup
 	}
 
 	propsPB, err := structpb.NewStruct(props)
diff --git a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
index 31f22b6d946..1eb5872b2eb 100644
--- a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
+++ b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
@@ -64,7 +64,8 @@ export async function submitRemoteSourceForm(
     Object.entries(values).map(([key, value]) => {
       switch (key) {
         case "project_id": 
-        case "output_location":
+        case "athena_output_location":
+        case "athena_workgroup":
           return [key, value];
         default:
           return [fromYupFriendlyKey(key), value];
diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts
index a67ae145466..9474266edba 100644
--- a/web-common/src/features/sources/modal/yupSchemas.ts
+++ b/web-common/src/features/sources/modal/yupSchemas.ts
@@ -79,8 +79,8 @@ export function getYupSchema(connector: V1ConnectorSpec) {
             "Source name must start with a letter or underscore and contain only letters, numbers, and underscores"
           )
           .required("Source name is required"),
-        output_location: yup.string().required(),
-        region: yup.string().required(),
+        athena_output_location: yup.string().required(),
+        athena_workgroup: yup.string(),
       });
 
     default:

From f0bbee98aa5c85d5f8f71b89cd5f4050ad5d87c1 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Tue, 12 Sep 2023 20:44:29 +0300
Subject: [PATCH 19/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 41 ++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 30ac103255c..59059dd8a8c 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -222,13 +222,30 @@ func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) e
 	return err
 }
 
+type janitorIterator struct {
+	drivers.FileIterator
+	ctx        context.Context
+	cfg        aws.Config
+	bucketName string
+	unloadPath string
+}
+
+func (ci janitorIterator) Close() error {
+	err := ci.FileIterator.Close()
+	if err != nil {
+		return err
+	}
+
+	return cleanPath(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
+}
+
 func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSource) (drivers.FileIterator, error) {
 	conf, err := parseSourceProperties(source.Properties)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	cfg, err := c.Cfg(ctx)
+	cfg, err := c.newCfg(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -255,15 +272,16 @@ func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSo
 		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, unloadPath))
 	}
 
-	err = cleanPath(ctx, cfg, bucketName, unloadPath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to clean path: %w", err)
-	}
-
-	return it, nil
+	return janitorIterator{
+		FileIterator: it,
+		ctx:          ctx,
+		unloadPath:   unloadPath,
+		bucketName:   bucketName,
+		cfg:          cfg,
+	}, nil
 }
 
-func (c *Connection) Cfg(ctx context.Context) (aws.Config, error) {
+func (c *Connection) newCfg(ctx context.Context) (aws.Config, error) {
 	var cfg aws.Config
 	var err error
 	if c.config.AllowHostAccess {
@@ -284,7 +302,7 @@ func (c *Connection) Cfg(ctx context.Context) (aws.Config, error) {
 }
 
 func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := c.Cfg(ctx)
+	cfg, err := c.newCfg(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -318,8 +336,7 @@ func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourcePro
 		return err
 	}
 
-	r := retrier.New(retrier.ConstantBackoff(20, 1*time.Second), nil)
-
+	r := retrier.New(retrier.ConstantBackoff(int(5*time.Minute/time.Second), time.Second), nil) // 5 minutes timeout
 	return r.RunCtx(ctx, func(ctx context.Context) error {
 		status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
 			QueryExecutionId: athenaExecution.QueryExecutionId,
@@ -335,6 +352,6 @@ func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourcePro
 		} else if state == types.QueryExecutionStateFailed {
 			return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
 		}
-		return fmt.Errorf("Execution is not completed yet, current state: %s", state)
+		return fmt.Errorf("Athena ingestion timeout")
 	})
 }

From 03d5c42b9fecd6a9f9c5bfc74e4dc387fd8abe4d Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 13 Sep 2023 17:29:23 +0300
Subject: [PATCH 20/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 .../transporter/objectStore_to_duckDB.go      |  4 +--
 runtime/drivers/duckdb/transporter/utils.go   | 30 +++++++++++++++++++
 .../sources/modal/AddSourceModal.svelte       |  1 +
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index cf28daac58e..e6b04a03b95 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -51,10 +51,10 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 	}
 
 	fromAthena := reflect.TypeOf(t.from).AssignableTo(reflect.TypeOf(&athena.Connection{}))
-	sql, hasSQL := src.Properties["sql"].(string)
+	sql, hasSQL := srcProps["sql"].(string)
 	// if sql is specified use ast rewrite to fill in the downloaded files
 	if hasSQL && !fromAthena {
-		return t.ingestDuckDBSQL(ctx, sql, iterator, dbSink, opts, p)
+		return t.ingestDuckDBSQL(ctx, sql, iterator, sinkCfg, opts, p)
 	}
 
 	p.Target(size, drivers.ProgressUnitByte)
diff --git a/runtime/drivers/duckdb/transporter/utils.go b/runtime/drivers/duckdb/transporter/utils.go
index ee609e148ec..f23b26fbe3d 100644
--- a/runtime/drivers/duckdb/transporter/utils.go
+++ b/runtime/drivers/duckdb/transporter/utils.go
@@ -5,8 +5,38 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+
+	"github.com/mitchellh/mapstructure"
 )
 
+type sourceProperties struct {
+	Database string `mapstructure:"db"`
+	SQL      string `mapstructure:"sql"`
+}
+
+func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
+	cfg := &sourceProperties{}
+	if err := mapstructure.Decode(props, cfg); err != nil {
+		return nil, fmt.Errorf("failed to parse source properties: %w", err)
+	}
+	if cfg.SQL == "" {
+		return nil, fmt.Errorf("property 'sql' is mandatory")
+	}
+	return cfg, nil
+}
+
+type sinkProperties struct {
+	Table string `mapstructure:"table"`
+}
+
+func parseSinkProperties(props map[string]any) (*sinkProperties, error) {
+	cfg := &sinkProperties{}
+	if err := mapstructure.Decode(props, cfg); err != nil {
+		return nil, fmt.Errorf("failed to parse sink properties: %w", err)
+	}
+	return cfg, nil
+}
+
 func sourceReader(paths []string, format string, ingestionProps map[string]any, fromAthena bool) (string, error) {
 	// Generate a "read" statement
 	if containsAny(format, []string{".csv", ".tsv", ".txt"}) {
diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte
index 2d566b9118d..9be5042f5eb 100644
--- a/web-common/src/features/sources/modal/AddSourceModal.svelte
+++ b/web-common/src/features/sources/modal/AddSourceModal.svelte
@@ -28,6 +28,7 @@
     "local_file",
     "motherduck",
     "bigquery",
+    "athena",
   ];
 
   const connectors = createRuntimeServiceListConnectors({

From a5a5146ab72daa0bddb243b127cbf252d5f06826 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 13 Sep 2023 17:37:13 +0300
Subject: [PATCH 21/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 runtime/drivers/athena/athena.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 59059dd8a8c..184ab6335a2 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -105,7 +105,7 @@ func (d driver) Spec() drivers.Spec {
 	return spec
 }
 
-func (d driver) HasAnonymousSourceAccess(ctx context.Context, src drivers.Source, logger *zap.Logger) (bool, error) {
+func (d driver) HasAnonymousSourceAccess(ctx context.Context, src map[string]any, logger *zap.Logger) (bool, error) {
 	return false, nil
 }
 
@@ -239,8 +239,8 @@ func (ci janitorIterator) Close() error {
 	return cleanPath(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
 }
 
-func (c *Connection) DownloadFiles(ctx context.Context, source *drivers.BucketSource) (drivers.FileIterator, error) {
-	conf, err := parseSourceProperties(source.Properties)
+func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (drivers.FileIterator, error) {
+	conf, err := parseSourceProperties(props)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}

From b7fc59d1d1c5b2986eb884ed586418ffbbef2fd2 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 13 Sep 2023 21:34:15 +0300
Subject: [PATCH 22/40] athena-driver review

---
 runtime/drivers/athena/athena.go              | 70 +++++--------------
 .../transporter/objectStore_to_duckDB.go      | 16 ++---
 .../duckdb/transporter/sqlstore_to_duckDB.go  |  5 +-
 3 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 184ab6335a2..38bc998b817 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -2,7 +2,6 @@ package athena
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"strings"
 	"time"
@@ -15,10 +14,8 @@ import (
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
 	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
 	"github.com/eapache/go-resiliency/retrier"
-	"github.com/google/uuid"
 	"github.com/mitchellh/mapstructure"
 	"github.com/rilldata/rill/runtime/drivers"
-	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
 	"github.com/rilldata/rill/runtime/pkg/activity"
 	"go.uber.org/zap"
 	"gocloud.dev/blob"
@@ -181,7 +178,7 @@ func (c *Connection) MigrationStatus(ctx context.Context) (current, desired int,
 
 // AsObjectStore implements drivers.Connection.
 func (c *Connection) AsObjectStore() (drivers.ObjectStore, bool) {
-	return c, true
+	return nil, false
 }
 
 // AsTransporter implements drivers.Connection.
@@ -195,7 +192,7 @@ func (c *Connection) AsFileStore() (drivers.FileStore, bool) {
 
 // AsSQLStore implements drivers.Connection.
 func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) {
-	return nil, false
+	return c, true
 }
 
 func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) error {
@@ -239,48 +236,6 @@ func (ci janitorIterator) Close() error {
 	return cleanPath(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
 }
 
-func (c *Connection) DownloadFiles(ctx context.Context, props map[string]any) (drivers.FileIterator, error) {
-	conf, err := parseSourceProperties(props)
-	if err != nil {
-		return nil, fmt.Errorf("failed to parse config: %w", err)
-	}
-
-	cfg, err := c.newCfg(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	unloadPath := "parquet_output_" + uuid.New().String()
-	bucketName := strings.Split(strings.TrimPrefix(conf.OutputLocation, "s3://"), "/")[0]
-	unloadLocation := strings.TrimRight(conf.OutputLocation, "/") + "/" + unloadPath
-	err = c.unload(ctx, cfg, conf, unloadLocation)
-	if err != nil {
-		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, unloadPath))
-	}
-
-	bucketObj, err := c.openBucket(ctx, conf, bucketName)
-	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", unloadLocation, err), cleanPath(ctx, cfg, bucketName, unloadPath))
-	}
-
-	opts := rillblob.Options{
-		GlobPattern: unloadPath + "/**",
-	}
-
-	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
-	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, unloadPath))
-	}
-
-	return janitorIterator{
-		FileIterator: it,
-		ctx:          ctx,
-		unloadPath:   unloadPath,
-		bucketName:   bucketName,
-		cfg:          cfg,
-	}, nil
-}
-
 func (c *Connection) newCfg(ctx context.Context) (aws.Config, error) {
 	var cfg aws.Config
 	var err error
@@ -311,12 +266,25 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
 }
 
-func (c *Connection) unload(ctx context.Context, cfg aws.Config, conf *sourceProperties, path string) error {
-	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, path)
-	client := athena.NewFromConfig(cfg)
+func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sourceProperties) (string, error) {
+	if conf.OutputLocation != "" {
+		return conf.OutputLocation, nil
+	} else {
+		wo, err := client.GetWorkGroup(ctx, &athena.GetWorkGroupInput{
+			WorkGroup: aws.String(conf.WorkGroup),
+		})
+		if err != nil {
+			return "", err
+		}
+		return *wo.WorkGroup.Configuration.ResultConfiguration.OutputLocation, nil
+	}
+}
+
+func (c *Connection) unload(client *athena.Client, ctx context.Context, cfg aws.Config, conf *sourceProperties, unloadLocation string) error {
+	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
 
 	resultConfig := &types.ResultConfiguration{
-		OutputLocation: aws.String(strings.TrimRight(conf.OutputLocation, "/") + "/output/"),
+		OutputLocation: aws.String(conf.OutputLocation),
 	}
 
 	executeParams := &athena.StartQueryExecutionInput{
diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index e6b04a03b95..28a68c13d95 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -4,13 +4,11 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"reflect"
 	"strings"
 	"time"
 
 	"github.com/c2h5oh/datasize"
 	"github.com/rilldata/rill/runtime/drivers"
-	"github.com/rilldata/rill/runtime/drivers/athena"
 	"github.com/rilldata/rill/runtime/pkg/duckdbsql"
 	"github.com/rilldata/rill/runtime/pkg/fileutil"
 	"github.com/rilldata/rill/runtime/pkg/observability"
@@ -50,10 +48,9 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 		return drivers.ErrIngestionLimitExceeded
 	}
 
-	fromAthena := reflect.TypeOf(t.from).AssignableTo(reflect.TypeOf(&athena.Connection{}))
 	sql, hasSQL := srcProps["sql"].(string)
 	// if sql is specified use ast rewrite to fill in the downloaded files
-	if hasSQL && !fromAthena {
+	if hasSQL {
 		return t.ingestDuckDBSQL(ctx, sql, iterator, sinkCfg, opts, p)
 	}
 
@@ -63,9 +60,6 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 	val, formatDefined := srcProps["format"].(string)
 	if formatDefined {
 		format = fmt.Sprintf(".%s", val)
-	} else if fromAthena {
-		format = "parquet"
-		formatDefined = true
 	}
 
 	allowSchemaRelaxation, err := schemaRelaxationProperty(srcProps)
@@ -108,13 +102,13 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 		st := time.Now()
 		t.logger.Info("ingesting files", zap.Strings("files", files), observability.ZapCtx(ctx))
 		if appendToTable {
-			if err := a.appendData(ctx, files, format, fromAthena); err != nil {
+			if err := a.appendData(ctx, files, format); err != nil {
 				return err
 			}
 		} else {
 			var from string
 			var err error
-			from, err = sourceReader(files, format, ingestionProps, fromAthena)
+			from, err = sourceReader(files, format, ingestionProps, false)
 			if err != nil {
 				return err
 			}
@@ -155,8 +149,8 @@ func newAppender(to drivers.OLAPStore, sink *sinkProperties, ingestionProps map[
 	}
 }
 
-func (a *appender) appendData(ctx context.Context, files []string, format string, fromAthena bool) error {
-	from, err := sourceReader(files, format, a.ingestionProps, fromAthena)
+func (a *appender) appendData(ctx context.Context, files []string, format string) error {
+	from, err := sourceReader(files, format, a.ingestionProps, false)
 	if err != nil {
 		return err
 	}
diff --git a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
index 4b5fc99356d..111be0bc720 100644
--- a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
@@ -3,9 +3,11 @@ package transporter
 import (
 	"context"
 	"fmt"
+	"reflect"
 	"time"
 
 	"github.com/rilldata/rill/runtime/drivers"
+	"github.com/rilldata/rill/runtime/drivers/athena"
 	"github.com/rilldata/rill/runtime/pkg/fileutil"
 	"github.com/rilldata/rill/runtime/pkg/observability"
 	"go.uber.org/zap"
@@ -51,6 +53,7 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map
 	// TODO :: iteration over fileiterator is similar(apart from no schema changes possible here)
 	// to consuming fileIterator in objectStore_to_duckDB
 	// both can be refactored to follow same path
+	fromAthena := reflect.TypeOf(s.from).AssignableTo(reflect.TypeOf(&athena.Connection{}))
 	for iter.HasNext() {
 		files, err := iter.NextBatch(opts.IteratorBatch)
 		if err != nil {
@@ -58,7 +61,7 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map
 		}
 
 		format := fileutil.FullExt(files[0])
-		from, err := sourceReader(files, format, make(map[string]any), false)
+		from, err := sourceReader(files, format, make(map[string]any), fromAthena)
 		if err != nil {
 			return err
 		}

From 28606aa0952cad80ec5c2d18665ba49aa3676557 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 13 Sep 2023 21:35:50 +0300
Subject: [PATCH 23/40] athena-driver review

---
 runtime/drivers/athena/athena.go    |  1 -
 runtime/drivers/athena/sql_store.go | 69 +++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 runtime/drivers/athena/sql_store.go

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 38bc998b817..d1fa0a5b726 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -3,7 +3,6 @@ package athena
 import (
 	"context"
 	"fmt"
-	"strings"
 	"time"
 
 	"github.com/aws/aws-sdk-go-v2/aws"
diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
new file mode 100644
index 00000000000..da6867aeac0
--- /dev/null
+++ b/runtime/drivers/athena/sql_store.go
@@ -0,0 +1,69 @@
+package athena
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/aws/aws-sdk-go-v2/service/athena"
+	"github.com/google/uuid"
+	"github.com/rilldata/rill/runtime/drivers"
+	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
+)
+
+func (c *Connection) Query(ctx context.Context, props map[string]any) (drivers.RowIterator, error) {
+	return nil, fmt.Errorf("not implemented")
+}
+
+func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, opt *drivers.QueryOption, p drivers.Progress) (drivers.FileIterator, error) {
+	conf, err := parseSourceProperties(props)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse config: %w", err)
+	}
+
+	cfg, err := c.newCfg(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	client := athena.NewFromConfig(cfg)
+	outputLocation, err := resolveOutputLocation(ctx, client, conf)
+	if err != nil {
+		return nil, err
+	}
+
+	// outputLocation s3://bucket-name/prefix
+	// unloadLocation s3://bucket-name/prefix/rill-connector-parquet-output-<uuid>
+	// unloadPath prefix/rill-connector-parquet-output-<uuid>
+	unloadFolderName := "parquet_output_" + uuid.New().String()
+	bucketName := strings.Split(strings.TrimPrefix(outputLocation, "s3://"), "/")[0]
+	unloadLocation := strings.TrimRight(outputLocation, "/") + "/" + unloadFolderName
+	unloadPath := strings.TrimPrefix(strings.TrimPrefix(unloadLocation, "s3://"+bucketName), "/")
+	err = c.unload(client, ctx, cfg, conf, strings.TrimRight(outputLocation, "/")+unloadFolderName)
+	if err != nil {
+		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, unloadPath))
+	}
+
+	bucketObj, err := c.openBucket(ctx, conf, bucketName)
+	if err != nil {
+		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanPath(ctx, cfg, bucketName, unloadPath))
+	}
+
+	opts := rillblob.Options{
+		GlobPattern: unloadPath + "/**",
+	}
+
+	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
+	if err != nil {
+		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, unloadPath))
+	}
+
+	return janitorIterator{
+		FileIterator: it,
+		ctx:          ctx,
+		unloadPath:   unloadPath,
+		bucketName:   bucketName,
+		cfg:          cfg,
+	}, nil
+}

From c6af926dbceb6f90faf5e9287f4b5be8af96d5a2 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Wed, 13 Sep 2023 21:58:24 +0300
Subject: [PATCH 24/40] athena-driver review

---
 runtime/drivers/athena/athena.go    | 12 ++++++++++--
 runtime/drivers/athena/sql_store.go |  3 ++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index d1fa0a5b726..88fa431f7ab 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -2,6 +2,7 @@ package athena
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"time"
 
@@ -204,6 +205,10 @@ func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) e
 		return err
 	}
 
+	if len(out.Contents) > 1000 { // aws error is opaque here
+		return fmt.Errorf("too many objects to delete %d from %s", len(out.Contents), "s3://"+bucketName+"/"+prefix)
+	}
+
 	ids := make([]s3v2types.ObjectIdentifier, 0, len(out.Contents))
 	for _, o := range out.Contents {
 		ids = append(ids, s3v2types.ObjectIdentifier{
@@ -211,6 +216,7 @@ func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) e
 		})
 	}
 	_, err = s3client.DeleteObjects(ctx, &s3v2.DeleteObjectsInput{
+		Bucket: &bucketName,
 		Delete: &s3v2types.Delete{
 			Objects: ids,
 		},
@@ -268,7 +274,7 @@ func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, buc
 func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sourceProperties) (string, error) {
 	if conf.OutputLocation != "" {
 		return conf.OutputLocation, nil
-	} else {
+	} else if conf.WorkGroup != "" {
 		wo, err := client.GetWorkGroup(ctx, &athena.GetWorkGroupInput{
 			WorkGroup: aws.String(conf.WorkGroup),
 		})
@@ -277,9 +283,11 @@ func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sou
 		}
 		return *wo.WorkGroup.Configuration.ResultConfiguration.OutputLocation, nil
 	}
+
+	return "", errors.New("Athena output location or Athena workgroup is required")
 }
 
-func (c *Connection) unload(client *athena.Client, ctx context.Context, cfg aws.Config, conf *sourceProperties, unloadLocation string) error {
+func (c *Connection) unload(ctx context.Context, client *athena.Client, cfg aws.Config, conf *sourceProperties, unloadLocation string) error {
 	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
 
 	resultConfig := &types.ResultConfiguration{
diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index da6867aeac0..237dda9dc6e 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -33,6 +33,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, opt
 		return nil, err
 	}
 
+	// ie
 	// outputLocation s3://bucket-name/prefix
 	// unloadLocation s3://bucket-name/prefix/rill-connector-parquet-output-<uuid>
 	// unloadPath prefix/rill-connector-parquet-output-<uuid>
@@ -40,7 +41,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, opt
 	bucketName := strings.Split(strings.TrimPrefix(outputLocation, "s3://"), "/")[0]
 	unloadLocation := strings.TrimRight(outputLocation, "/") + "/" + unloadFolderName
 	unloadPath := strings.TrimPrefix(strings.TrimPrefix(unloadLocation, "s3://"+bucketName), "/")
-	err = c.unload(client, ctx, cfg, conf, strings.TrimRight(outputLocation, "/")+unloadFolderName)
+	err = c.unload(ctx, client, cfg, conf, unloadLocation)
 	if err != nil {
 		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, unloadPath))
 	}

From 6d0476511e463b9d62d3324ceaff60808d58cb01 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Thu, 14 Sep 2023 14:14:04 +0300
Subject: [PATCH 25/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 .../transporter/objectStore_to_duckDB.go      |  1 -
 runtime/drivers/duckdb/transporter/utils.go   | 76 -------------------
 2 files changed, 77 deletions(-)

diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index 3530360d292..031e1b60e95 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -52,7 +52,6 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 		return drivers.ErrIngestionLimitExceeded
 	}
 
-	sql, hasSQL := srcProps["sql"].(string)
 	// if sql is specified use ast rewrite to fill in the downloaded files
 	if srcCfg.SQL != "" {
 		return t.ingestDuckDBSQL(ctx, srcCfg.SQL, iterator, sinkCfg, opts, p)
diff --git a/runtime/drivers/duckdb/transporter/utils.go b/runtime/drivers/duckdb/transporter/utils.go
index e148ea94528..c7b56fdd1dc 100644
--- a/runtime/drivers/duckdb/transporter/utils.go
+++ b/runtime/drivers/duckdb/transporter/utils.go
@@ -98,82 +98,6 @@ func parseFileSourceProperties(props map[string]any) (*fileSourceProperties, err
 	return cfg, nil
 }
 
-type dbSourceProperties struct {
-	Database string `mapstructure:"db"`
-	SQL      string `mapstructure:"sql"`
-}
-
-func parseDBSourceProperties(props map[string]any) (*dbSourceProperties, error) {
-	cfg := &dbSourceProperties{}
-	if err := mapstructure.Decode(props, cfg); err != nil {
-		return nil, fmt.Errorf("failed to parse source properties: %w", err)
-	}
-	if cfg.SQL == "" {
-		return nil, fmt.Errorf("property 'sql' is mandatory")
-	}
-	return cfg, nil
-}
-
-type fileSourceProperties struct {
-	SQL                   string         `mapstructure:"sql"`
-	DuckDB                map[string]any `mapstructure:"duckdb"`
-	Format                string         `mapstructure:"format"`
-	AllowSchemaRelaxation bool           `mapstructure:"allow_schema_relaxation"`
-	BatchSize             string         `mapstructure:"batch_size"`
-	BatchSizeBytes        int64          `mapstructure:"-"` // Inferred from BatchSize
-
-	// Backwards compatibility
-	HivePartitioning            *bool  `mapstructure:"hive_partitioning"`
-	CSVDelimiter                string `mapstructure:"csv.delimiter"`
-	IngestAllowSchemaRelaxation *bool  `mapstructure:"ingest.allow_schema_relaxation"`
-}
-
-func parseFileSourceProperties(props map[string]any) (*fileSourceProperties, error) {
-	cfg := &fileSourceProperties{}
-	if err := mapstructure.Decode(props, cfg); err != nil {
-		return nil, fmt.Errorf("failed to parse source properties: %w", err)
-	}
-
-	if cfg.DuckDB == nil {
-		cfg.DuckDB = map[string]any{}
-	}
-
-	if cfg.HivePartitioning != nil {
-		cfg.DuckDB["hive_partitioning"] = *cfg.HivePartitioning
-		cfg.HivePartitioning = nil
-	}
-
-	if cfg.CSVDelimiter != "" {
-		cfg.DuckDB["delim"] = fmt.Sprintf("'%v'", cfg.CSVDelimiter)
-		cfg.CSVDelimiter = ""
-	}
-
-	if cfg.IngestAllowSchemaRelaxation != nil {
-		cfg.AllowSchemaRelaxation = *cfg.IngestAllowSchemaRelaxation
-		cfg.IngestAllowSchemaRelaxation = nil
-	}
-
-	if cfg.AllowSchemaRelaxation {
-		if val, ok := cfg.DuckDB["union_by_name"].(bool); ok && !val {
-			return nil, fmt.Errorf("can't set `union_by_name` and `allow_schema_relaxation` at the same time")
-		}
-
-		if hasKey(cfg.DuckDB, "columns", "types", "dtypes") {
-			return nil, fmt.Errorf("if any of `columns`,`types`,`dtypes` is set `allow_schema_relaxation` must be disabled")
-		}
-	}
-
-	if cfg.BatchSize != "" {
-		b, err := datasize.ParseString(cfg.BatchSize)
-		if err != nil {
-			return nil, err
-		}
-		cfg.BatchSizeBytes = int64(b.Bytes())
-	}
-
-	return cfg, nil
-}
-
 func sourceReader(paths []string, format string, ingestionProps map[string]any, fromAthena bool) (string, error) {
 	// Generate a "read" statement
 	if containsAny(format, []string{".csv", ".tsv", ".txt"}) {

From 45efd9a02d88e6dd6b44d86f61cb8166febaf905 Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Thu, 14 Sep 2023 14:32:02 +0300
Subject: [PATCH 26/40] Merge remote-tracking branch 'origin/main' into
 athena-connector

---
 web-common/src/features/sources/modal/submitRemoteSourceForm.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
index 1eb5872b2eb..148e8f66596 100644
--- a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
+++ b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
@@ -63,7 +63,7 @@ export async function submitRemoteSourceForm(
   const formValues = Object.fromEntries(
     Object.entries(values).map(([key, value]) => {
       switch (key) {
-        case "project_id": 
+        case "project_id":
         case "athena_output_location":
         case "athena_workgroup":
           return [key, value];

From b594ed4e8a2f8d16cc917882e97a1f19609d642d Mon Sep 17 00:00:00 2001
From: Egor Ryashin <egor.ryashin@rilldata.com>
Date: Thu, 21 Sep 2023 13:01:44 +0300
Subject: [PATCH 27/40] athena-driver review

---
 runtime/drivers/athena/athena.go | 41 ++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 88fa431f7ab..c408ccca690 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -13,7 +13,6 @@ import (
 	"github.com/aws/aws-sdk-go-v2/service/athena/types"
 	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
 	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
-	"github.com/eapache/go-resiliency/retrier"
 	"github.com/mitchellh/mapstructure"
 	"github.com/rilldata/rill/runtime/drivers"
 	"github.com/rilldata/rill/runtime/pkg/activity"
@@ -311,22 +310,28 @@ func (c *Connection) unload(ctx context.Context, client *athena.Client, cfg aws.
 		return err
 	}
 
-	r := retrier.New(retrier.ConstantBackoff(int(5*time.Minute/time.Second), time.Second), nil) // 5 minutes timeout
-	return r.RunCtx(ctx, func(ctx context.Context) error {
-		status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
-			QueryExecutionId: athenaExecution.QueryExecutionId,
-		})
-		if err != nil {
-			return err
+	tm := time.NewTimer(5 * time.Minute)
+	defer tm.Stop()
+	for {
+		select {
+		case <-tm.C:
+			fmt.Errorf("Athena ingestion timeout")
+		default:
+			status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
+				QueryExecutionId: athenaExecution.QueryExecutionId,
+			})
+			if err != nil {
+				return err
+			}
+
+			state := status.QueryExecution.Status.State
+
+			if state == types.QueryExecutionStateSucceeded || state == types.QueryExecutionStateCancelled {
+				return nil
+			} else if state == types.QueryExecutionStateFailed {
+				return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
+			}
 		}
-
-		state := status.QueryExecution.Status.State
-
-		if state == types.QueryExecutionStateSucceeded || state == types.QueryExecutionStateCancelled {
-			return nil
-		} else if state == types.QueryExecutionStateFailed {
-			return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
-		}
-		return fmt.Errorf("Athena ingestion timeout")
-	})
+		time.Sleep(time.Second)
+	}
 }

From bf2f0449884e1cb834445b18bcd8c13ba4bdaa27 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Sat, 23 Sep 2023 11:10:37 +0300
Subject: [PATCH 28/40] Auto-determine AWS region cleanUp function Added AWS
 region and reordered functions Moved functions to sql_store Renaming and code
 refactoring

---
 runtime/connections.go                        |   2 +-
 runtime/drivers/athena/athena.go              | 196 ++----------
 runtime/drivers/athena/sql_store.go           | 285 ++++++++++++++++--
 .../duckdb/transporter/filestore_to_duckDB.go |   2 +-
 .../transporter/objectStore_to_duckDB.go      |   4 +-
 .../duckdb/transporter/sqlstore_to_duckDB.go  |   8 +-
 runtime/drivers/duckdb/transporter/utils.go   |   4 +-
 .../catalog/artifacts/yaml/objects.go         |   8 +-
 .../catalog/migrator/sources/sources.go       |   6 +-
 .../sources/modal/submitRemoteSourceForm.ts   |   4 +-
 .../src/features/sources/modal/yupSchemas.ts  |   4 +-
 11 files changed, 305 insertions(+), 218 deletions(-)

diff --git a/runtime/connections.go b/runtime/connections.go
index 4ca6ff5344a..9710a9fc48b 100644
--- a/runtime/connections.go
+++ b/runtime/connections.go
@@ -227,7 +227,7 @@ func (r *Runtime) connectorConfig(ctx context.Context, instanceID, name string)
 	// For backwards compatibility, certain root-level variables apply to certain implicit connectors.
 	// NOTE: This switches on connector.Name, not connector.Type, because this only applies to implicit connectors.
 	switch connector.Name {
-	case "s3":
+	case "s3", "athena":
 		setIfNil(cfg, "aws_access_key_id", vars["aws_access_key_id"])
 		setIfNil(cfg, "aws_secret_access_key", vars["aws_secret_access_key"])
 		setIfNil(cfg, "aws_session_token", vars["aws_session_token"])
diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index c408ccca690..6aaeb92c108 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -2,23 +2,12 @@ package athena
 
 import (
 	"context"
-	"errors"
 	"fmt"
-	"time"
 
-	"github.com/aws/aws-sdk-go-v2/aws"
-	awsconfig "github.com/aws/aws-sdk-go-v2/config"
-	"github.com/aws/aws-sdk-go-v2/credentials"
-	"github.com/aws/aws-sdk-go-v2/service/athena"
-	"github.com/aws/aws-sdk-go-v2/service/athena/types"
-	s3v2 "github.com/aws/aws-sdk-go-v2/service/s3"
-	s3v2types "github.com/aws/aws-sdk-go-v2/service/s3/types"
 	"github.com/mitchellh/mapstructure"
 	"github.com/rilldata/rill/runtime/drivers"
 	"github.com/rilldata/rill/runtime/pkg/activity"
 	"go.uber.org/zap"
-	"gocloud.dev/blob"
-	"gocloud.dev/blob/s3blob"
 )
 
 func init() {
@@ -40,20 +29,27 @@ var spec = drivers.Spec{
 			Placeholder: "select * from catalog.table;",
 		},
 		{
-			Key:         "athena_output_location",
+			Key:         "output_location",
 			DisplayName: "S3 output location",
-			Description: "Oputut location for query results in S3.",
+			Description: "Output location for query results in S3.",
 			Placeholder: "s3://bucket-name/path/",
 			Type:        drivers.StringPropertyType,
-			Required:    true,
+			Required:    false,
 		},
 		{
-			Key:         "athena_workgroup",
+			Key:         "workgroup",
 			DisplayName: "AWS Athena workgroup",
 			Description: "AWS Athena workgroup to use for queries.",
 			Type:        drivers.StringPropertyType,
 			Required:    false,
 		},
+		{
+			Key:         "region",
+			DisplayName: "AWS region",
+			Description: "AWS region to connect to Athena and the output location.",
+			Type:        drivers.StringPropertyType,
+			Required:    false,
+		},
 	},
 	ConfigProperties: []drivers.PropertySchema{
 		{
@@ -69,14 +65,7 @@ var spec = drivers.Spec{
 
 type driver struct{}
 
-type configProperties struct {
-	AccessKeyID     string `mapstructure:"aws_access_key_id"`
-	SecretAccessKey string `mapstructure:"aws_secret_access_key"`
-	SessionToken    string `mapstructure:"aws_access_token"`
-	AllowHostAccess bool   `mapstructure:"allow_host_access"`
-}
-
-func (d driver) Open(config map[string]any, shared bool, client activity.Client, logger *zap.Logger) (drivers.Handle, error) {
+func (d driver) Open(config map[string]any, shared bool, _ activity.Client, logger *zap.Logger) (drivers.Handle, error) {
 	if shared {
 		return nil, fmt.Errorf("athena driver can't be shared")
 	}
@@ -105,22 +94,6 @@ func (d driver) HasAnonymousSourceAccess(ctx context.Context, src map[string]any
 	return false, nil
 }
 
-type sourceProperties struct {
-	SQL            string `mapstructure:"sql"`
-	OutputLocation string `mapstructure:"athena_output_location"`
-	WorkGroup      string `mapstructure:"athena_workgroup"`
-}
-
-func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
-	conf := &sourceProperties{}
-	err := mapstructure.Decode(props, conf)
-	if err != nil {
-		return nil, err
-	}
-
-	return conf, nil
-}
-
 type Connection struct {
 	config *configProperties
 	logger *zap.Logger
@@ -194,144 +167,9 @@ func (c *Connection) AsSQLStore() (drivers.SQLStore, bool) {
 	return c, true
 }
 
-func cleanPath(ctx context.Context, cfg aws.Config, bucketName, prefix string) error {
-	s3client := s3v2.NewFromConfig(cfg)
-	out, err := s3client.ListObjectsV2(ctx, &s3v2.ListObjectsV2Input{
-		Bucket: &bucketName,
-		Prefix: &prefix,
-	})
-	if err != nil {
-		return err
-	}
-
-	if len(out.Contents) > 1000 { // aws error is opaque here
-		return fmt.Errorf("too many objects to delete %d from %s", len(out.Contents), "s3://"+bucketName+"/"+prefix)
-	}
-
-	ids := make([]s3v2types.ObjectIdentifier, 0, len(out.Contents))
-	for _, o := range out.Contents {
-		ids = append(ids, s3v2types.ObjectIdentifier{
-			Key: o.Key,
-		})
-	}
-	_, err = s3client.DeleteObjects(ctx, &s3v2.DeleteObjectsInput{
-		Bucket: &bucketName,
-		Delete: &s3v2types.Delete{
-			Objects: ids,
-		},
-	})
-	return err
-}
-
-type janitorIterator struct {
-	drivers.FileIterator
-	ctx        context.Context
-	cfg        aws.Config
-	bucketName string
-	unloadPath string
-}
-
-func (ci janitorIterator) Close() error {
-	err := ci.FileIterator.Close()
-	if err != nil {
-		return err
-	}
-
-	return cleanPath(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
-}
-
-func (c *Connection) newCfg(ctx context.Context) (aws.Config, error) {
-	var cfg aws.Config
-	var err error
-	if c.config.AllowHostAccess {
-		cfg, err = awsconfig.LoadDefaultConfig(
-			ctx,
-		)
-	} else {
-		cfg, err = awsconfig.LoadDefaultConfig(
-			ctx,
-			awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)),
-		)
-	}
-	if err != nil {
-		return aws.Config{}, err
-	}
-
-	return cfg, nil
-}
-
-func (c *Connection) openBucket(ctx context.Context, conf *sourceProperties, bucket string) (*blob.Bucket, error) {
-	cfg, err := c.newCfg(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	s3client := s3v2.NewFromConfig(cfg)
-	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
-}
-
-func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sourceProperties) (string, error) {
-	if conf.OutputLocation != "" {
-		return conf.OutputLocation, nil
-	} else if conf.WorkGroup != "" {
-		wo, err := client.GetWorkGroup(ctx, &athena.GetWorkGroupInput{
-			WorkGroup: aws.String(conf.WorkGroup),
-		})
-		if err != nil {
-			return "", err
-		}
-		return *wo.WorkGroup.Configuration.ResultConfiguration.OutputLocation, nil
-	}
-
-	return "", errors.New("Athena output location or Athena workgroup is required")
-}
-
-func (c *Connection) unload(ctx context.Context, client *athena.Client, cfg aws.Config, conf *sourceProperties, unloadLocation string) error {
-	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
-
-	resultConfig := &types.ResultConfiguration{
-		OutputLocation: aws.String(conf.OutputLocation),
-	}
-
-	executeParams := &athena.StartQueryExecutionInput{
-		QueryString:         aws.String(finalSQL),
-		ResultConfiguration: resultConfig,
-	}
-
-	if conf.WorkGroup != "" {
-		executeParams = &athena.StartQueryExecutionInput{
-			QueryString: aws.String(finalSQL),
-			WorkGroup:   aws.String(conf.WorkGroup),
-		}
-	}
-
-	athenaExecution, err := client.StartQueryExecution(ctx, executeParams)
-	if err != nil {
-		return err
-	}
-
-	tm := time.NewTimer(5 * time.Minute)
-	defer tm.Stop()
-	for {
-		select {
-		case <-tm.C:
-			fmt.Errorf("Athena ingestion timeout")
-		default:
-			status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
-				QueryExecutionId: athenaExecution.QueryExecutionId,
-			})
-			if err != nil {
-				return err
-			}
-
-			state := status.QueryExecution.Status.State
-
-			if state == types.QueryExecutionStateSucceeded || state == types.QueryExecutionStateCancelled {
-				return nil
-			} else if state == types.QueryExecutionStateFailed {
-				return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
-			}
-		}
-		time.Sleep(time.Second)
-	}
+type configProperties struct {
+	AccessKeyID     string `mapstructure:"aws_access_key_id"`
+	SecretAccessKey string `mapstructure:"aws_secret_access_key"`
+	SessionToken    string `mapstructure:"aws_access_token"`
+	AllowHostAccess bool   `mapstructure:"allow_host_access"`
 }
diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 237dda9dc6e..8b3722cb351 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -4,51 +4,97 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"net/url"
 	"strings"
+	"time"
 
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
 	"github.com/aws/aws-sdk-go-v2/service/athena"
+	types2 "github.com/aws/aws-sdk-go-v2/service/athena/types"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/aws/aws-sdk-go-v2/service/s3/types"
 	"github.com/google/uuid"
+	"github.com/mitchellh/mapstructure"
 	"github.com/rilldata/rill/runtime/drivers"
 	rillblob "github.com/rilldata/rill/runtime/drivers/blob"
+	"gocloud.dev/blob"
+	"gocloud.dev/blob/s3blob"
 )
 
-func (c *Connection) Query(ctx context.Context, props map[string]any) (drivers.RowIterator, error) {
+func (c *Connection) Query(_ context.Context, _ map[string]any) (drivers.RowIterator, error) {
 	return nil, fmt.Errorf("not implemented")
 }
 
-func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, opt *drivers.QueryOption, p drivers.Progress) (drivers.FileIterator, error) {
+func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *drivers.QueryOption, _ drivers.Progress) (drivers.FileIterator, error) {
 	conf, err := parseSourceProperties(props)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	cfg, err := c.newCfg(ctx)
+	// Determine AWS region if it is not specified
+	determineAWSRegion := conf.AWSRegion == ""
+
+	awsRegion := conf.AWSRegion
+	if determineAWSRegion {
+		// AWS region is not specified, use 'us-east-1' for now
+		awsRegion = "us-east-1"
+	}
+
+	awsConfig, err := c.awsConfig(ctx, awsRegion)
 	if err != nil {
 		return nil, err
 	}
 
-	client := athena.NewFromConfig(cfg)
+	client := athena.NewFromConfig(awsConfig)
 	outputLocation, err := resolveOutputLocation(ctx, client, conf)
 	if err != nil {
 		return nil, err
 	}
 
-	// ie
-	// outputLocation s3://bucket-name/prefix
-	// unloadLocation s3://bucket-name/prefix/rill-connector-parquet-output-<uuid>
-	// unloadPath prefix/rill-connector-parquet-output-<uuid>
-	unloadFolderName := "parquet_output_" + uuid.New().String()
-	bucketName := strings.Split(strings.TrimPrefix(outputLocation, "s3://"), "/")[0]
-	unloadLocation := strings.TrimRight(outputLocation, "/") + "/" + unloadFolderName
-	unloadPath := strings.TrimPrefix(strings.TrimPrefix(unloadLocation, "s3://"+bucketName), "/")
-	err = c.unload(ctx, client, cfg, conf, unloadLocation)
+	outputURL, err := url.Parse(outputLocation)
+	if err != nil {
+		return nil, err
+	}
+
+	// outputLocation s3://bucket/path
+	// unloadLocation s3://bucket/path/rill_tmp_<uuid>
+	// unloadPath path/rill_tmp_<uuid>
+	unloadFolderName := "rill_tmp_" + uuid.New().String()
+	bucketName := outputURL.Hostname()
+	unloadURL := outputURL.JoinPath(unloadFolderName)
+	unloadLocation := unloadURL.String()
+	unloadPath := strings.TrimPrefix(unloadURL.Path, "/")
+
+	// Determine actual AWS region and update the config if needed
+	if determineAWSRegion {
+		actualRegion, err := getActualAWSRegion(ctx, awsConfig, bucketName)
+		if err != nil {
+			return nil, err
+		}
+
+		if awsRegion != actualRegion {
+			awsRegion = actualRegion
+			awsConfig, err = c.awsConfig(ctx, awsRegion)
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+
+	cleanUp := func() error {
+		return deleteObjectsInPrefix(ctx, awsConfig, bucketName, unloadPath)
+	}
+
+	err = c.unload(ctx, client, conf, unloadLocation)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanPath(ctx, cfg, bucketName, unloadPath))
+		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanUp())
 	}
 
-	bucketObj, err := c.openBucket(ctx, conf, bucketName)
+	bucketObj, err := openBucket(ctx, awsConfig, bucketName)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanPath(ctx, cfg, bucketName, unloadPath))
+		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanUp())
 	}
 
 	opts := rillblob.Options{
@@ -57,14 +103,215 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, opt
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanPath(ctx, cfg, bucketName, unloadPath))
+		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanUp())
 	}
 
-	return janitorIterator{
+	return autoDeleteFileIterator{
 		FileIterator: it,
 		ctx:          ctx,
 		unloadPath:   unloadPath,
 		bucketName:   bucketName,
-		cfg:          cfg,
+		cfg:          awsConfig,
 	}, nil
 }
+
+func (c *Connection) awsConfig(ctx context.Context, awsRegion string) (aws.Config, error) {
+	loadOptions := make([]func(*config.LoadOptions) error, 0)
+
+	if awsRegion != "" {
+		loadOptions = append(loadOptions, config.WithDefaultRegion(awsRegion))
+	}
+
+	// If one of the static properties is specified: access key, secret key, or session token, use static credentials,
+	// Else fallback to the SDK's default credential chain (environment, instance, etc) unless AllowHostAccess is false
+	if c.config.AccessKeyID != "" || c.config.SecretAccessKey != "" || c.config.SessionToken != "" {
+		p := credentials.NewStaticCredentialsProvider(c.config.AccessKeyID, c.config.SecretAccessKey, c.config.SessionToken)
+		loadOptions = append(loadOptions, config.WithCredentialsProvider(p))
+	} else if !c.config.AllowHostAccess {
+		return aws.Config{}, fmt.Errorf("static creds are not provided, and host access is not allowed")
+	}
+
+	return config.LoadDefaultConfig(ctx, loadOptions...)
+}
+
+func (c *Connection) unload(ctx context.Context, client *athena.Client, conf *sourceProperties, unloadLocation string) error {
+	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
+
+	executeParams := &athena.StartQueryExecutionInput{
+		QueryString: aws.String(finalSQL),
+	}
+
+	// If output_location is set, use it and don't set workgroup because the workgroup can override the output location
+	// Otherwise use specified workgroup or the "primary" workgroup
+	// see https://docs.aws.amazon.com/athena/latest/ug/querying.html
+	if conf.OutputLocation != "" {
+		executeParams.ResultConfiguration = &types2.ResultConfiguration{
+			OutputLocation: aws.String(conf.OutputLocation),
+		}
+	} else {
+		workgroup := conf.Workgroup
+		if workgroup == "" {
+			// fallback to "primary" (default) workgroup if no workgroup is specified
+			workgroup = "primary"
+		}
+		executeParams.WorkGroup = aws.String(workgroup)
+	}
+
+	queryExecutionOutput, err := client.StartQueryExecution(ctx, executeParams)
+	if err != nil {
+		return err
+	}
+
+	tm := time.NewTimer(5 * time.Minute)
+	defer tm.Stop()
+	for {
+		select {
+		case <-tm.C:
+			return fmt.Errorf("Athena ingestion timed out")
+		default:
+			status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
+				QueryExecutionId: queryExecutionOutput.QueryExecutionId,
+			})
+			if err != nil {
+				return err
+			}
+
+			switch status.QueryExecution.Status.State {
+			case types2.QueryExecutionStateSucceeded, types2.QueryExecutionStateCancelled:
+				return nil
+			case types2.QueryExecutionStateFailed:
+				return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
+			}
+		}
+		time.Sleep(time.Second)
+	}
+}
+
+func parseSourceProperties(props map[string]any) (*sourceProperties, error) {
+	conf := &sourceProperties{}
+	err := mapstructure.Decode(props, conf)
+	if err != nil {
+		return nil, err
+	}
+
+	return conf, nil
+}
+
+func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sourceProperties) (string, error) {
+	if conf.OutputLocation != "" {
+		return conf.OutputLocation, nil
+	}
+
+	workgroup := conf.Workgroup
+	// fallback to "primary" (default) workgroup if no workgroup is specified
+	if workgroup == "" {
+		workgroup = "primary"
+	}
+
+	wo, err := client.GetWorkGroup(ctx, &athena.GetWorkGroupInput{
+		WorkGroup: aws.String(workgroup),
+	})
+	if err != nil {
+		return "", err
+	}
+
+	resultConfiguration := wo.WorkGroup.Configuration.ResultConfiguration
+	if resultConfiguration != nil && resultConfiguration.OutputLocation != nil && *resultConfiguration.OutputLocation != "" {
+		return *resultConfiguration.OutputLocation, nil
+	}
+
+	return "", fmt.Errorf("either output_location or workgroup with an output location must be set")
+}
+
+func getActualAWSRegion(ctx context.Context, awsConfig aws.Config, bucketName string) (string, error) {
+	s3client := s3.NewFromConfig(awsConfig)
+
+	resp, err := s3client.GetBucketLocation(ctx, &s3.GetBucketLocationInput{
+		Bucket: &bucketName,
+	})
+	if err != nil {
+		return "", err
+	}
+
+	actualRegion := string(resp.LocationConstraint)
+	if actualRegion == "" { // For US East (N. Virginia) region
+		actualRegion = "us-east-1"
+	}
+	return actualRegion, nil
+}
+
+func openBucket(ctx context.Context, cfg aws.Config, bucket string) (*blob.Bucket, error) {
+	s3client := s3.NewFromConfig(cfg)
+	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)
+}
+
+func deleteObjectsInPrefix(ctx context.Context, cfg aws.Config, bucketName, prefix string) error {
+	s3client := s3.NewFromConfig(cfg)
+
+	deleteBatch := func(objects []types.ObjectIdentifier) error {
+		_, err := s3client.DeleteObjects(ctx, &s3.DeleteObjectsInput{
+			Bucket: &bucketName,
+			Delete: &types.Delete{
+				Objects: objects,
+			},
+		})
+		return err
+	}
+
+	var continuationToken *string
+	for {
+		out, err := s3client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{
+			Bucket:            &bucketName,
+			Prefix:            &prefix,
+			ContinuationToken: continuationToken,
+		})
+		if err != nil {
+			return err
+		}
+
+		ids := make([]types.ObjectIdentifier, 0, len(out.Contents))
+		for _, o := range out.Contents {
+			ids = append(ids, types.ObjectIdentifier{
+				Key: o.Key,
+			})
+		}
+
+		if len(ids) > 0 {
+			if err := deleteBatch(ids); err != nil {
+				return err
+			}
+		}
+
+		if out.IsTruncated {
+			continuationToken = out.NextContinuationToken
+		} else {
+			break
+		}
+	}
+
+	return nil
+}
+
+type sourceProperties struct {
+	SQL            string `mapstructure:"sql"`
+	OutputLocation string `mapstructure:"output_location"`
+	Workgroup      string `mapstructure:"workgroup"`
+	AWSRegion      string `mapstructure:"region"`
+}
+
+type autoDeleteFileIterator struct {
+	drivers.FileIterator
+	ctx        context.Context
+	cfg        aws.Config
+	bucketName string
+	unloadPath string
+}
+
+func (ci autoDeleteFileIterator) Close() error {
+	err := ci.FileIterator.Close()
+	if err != nil {
+		return err
+	}
+
+	return deleteObjectsInPrefix(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
+}
diff --git a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
index 073a5cb5a42..813c8cfd355 100644
--- a/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/filestore_to_duckDB.go
@@ -59,7 +59,7 @@ func (t *fileStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps ma
 	}
 
 	// Ingest data
-	from, err := sourceReader(localPaths, format, srcCfg.DuckDB, false)
+	from, err := sourceReader(localPaths, format, srcCfg.DuckDB)
 	if err != nil {
 		return err
 	}
diff --git a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
index f5ebbd5c4df..d2121bd64ed 100644
--- a/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/objectStore_to_duckDB.go
@@ -96,7 +96,7 @@ func (t *objectStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps
 				return err
 			}
 		} else {
-			from, err := sourceReader(files, format, srcCfg.DuckDB, false)
+			from, err := sourceReader(files, format, srcCfg.DuckDB)
 			if err != nil {
 				return err
 			}
@@ -136,7 +136,7 @@ func newAppender(to drivers.OLAPStore, sink *sinkProperties, ingestionProps map[
 }
 
 func (a *appender) appendData(ctx context.Context, files []string, format string) error {
-	from, err := sourceReader(files, format, a.ingestionProps, false)
+	from, err := sourceReader(files, format, a.ingestionProps)
 	if err != nil {
 		return err
 	}
diff --git a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
index b53cbf26def..58498509ee5 100644
--- a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
@@ -69,7 +69,13 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map
 		}
 
 		format := fileutil.FullExt(files[0])
-		from, err := sourceReader(files, format, make(map[string]any), fromAthena)
+		if fromAthena {
+			// Athena doesn't specify ".parquet" extension in output file names
+			// Append ".parquet" extension to the extension generated by Athena
+			format += ".parquet"
+		}
+
+		from, err := sourceReader(files, format, make(map[string]any))
 		if err != nil {
 			return err
 		}
diff --git a/runtime/drivers/duckdb/transporter/utils.go b/runtime/drivers/duckdb/transporter/utils.go
index c7b56fdd1dc..51c9acf73c0 100644
--- a/runtime/drivers/duckdb/transporter/utils.go
+++ b/runtime/drivers/duckdb/transporter/utils.go
@@ -98,12 +98,12 @@ func parseFileSourceProperties(props map[string]any) (*fileSourceProperties, err
 	return cfg, nil
 }
 
-func sourceReader(paths []string, format string, ingestionProps map[string]any, fromAthena bool) (string, error) {
+func sourceReader(paths []string, format string, ingestionProps map[string]any) (string, error) {
 	// Generate a "read" statement
 	if containsAny(format, []string{".csv", ".tsv", ".txt"}) {
 		// CSV reader
 		return generateReadCsvStatement(paths, ingestionProps)
-	} else if strings.Contains(format, ".parquet") || fromAthena {
+	} else if strings.Contains(format, ".parquet") {
 		// Parquet reader
 		return generateReadParquetStatement(paths, ingestionProps)
 	} else if containsAny(format, []string{".json", ".ndjson"}) {
diff --git a/runtime/services/catalog/artifacts/yaml/objects.go b/runtime/services/catalog/artifacts/yaml/objects.go
index 7078a118539..eb795c2a199 100644
--- a/runtime/services/catalog/artifacts/yaml/objects.go
+++ b/runtime/services/catalog/artifacts/yaml/objects.go
@@ -45,8 +45,8 @@ type Source struct {
 	SQL                         string         `yaml:"sql,omitempty" mapstructure:"sql,omitempty"`
 	DB                          string         `yaml:"db,omitempty" mapstructure:"db,omitempty"`
 	ProjectID                   string         `yaml:"project_id,omitempty" mapstructure:"project_id,omitempty"`
-	AthenaOutputLocation        string         `yaml:"athena_output_location,omitempty" mapstructure:"athena_output_location,omitempty"`
-	AthenaWorkgroup             string         `yaml:"athena_workgroup,omitempty" mapstructure:"athena_workgroup,omitempty"`
+	AthenaOutputLocation        string         `yaml:"output_location,omitempty" mapstructure:"output_location,omitempty"`
+	AthenaWorkgroup             string         `yaml:"workgroup,omitempty" mapstructure:"workgroup,omitempty"`
 }
 
 type MetricsView struct {
@@ -210,11 +210,11 @@ func fromSourceArtifact(source *Source, path string) (*drivers.CatalogEntry, err
 	}
 
 	if source.AthenaOutputLocation != "" {
-		props["athena_output_location"] = source.AthenaOutputLocation
+		props["output_location"] = source.AthenaOutputLocation
 	}
 
 	if source.AthenaWorkgroup != "" {
-		props["athena_workgroup"] = source.AthenaWorkgroup
+		props["workgroup"] = source.AthenaWorkgroup
 	}
 
 	propsPB, err := structpb.NewStruct(props)
diff --git a/runtime/services/catalog/migrator/sources/sources.go b/runtime/services/catalog/migrator/sources/sources.go
index 570aa809931..c805b2c3374 100644
--- a/runtime/services/catalog/migrator/sources/sources.go
+++ b/runtime/services/catalog/migrator/sources/sources.go
@@ -376,11 +376,7 @@ func connectorVariables(src *runtimev1.Source, env map[string]string, repoRoot s
 		"allow_host_access": strings.EqualFold(env["allow_host_access"], "true"),
 	}
 	switch connector {
-	case "s3":
-		vars["aws_access_key_id"] = env["aws_access_key_id"]
-		vars["aws_secret_access_key"] = env["aws_secret_access_key"]
-		vars["aws_session_token"] = env["aws_session_token"]
-	case "athena":
+	case "s3", "athena":
 		vars["aws_access_key_id"] = env["aws_access_key_id"]
 		vars["aws_secret_access_key"] = env["aws_secret_access_key"]
 		vars["aws_session_token"] = env["aws_session_token"]
diff --git a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
index 148e8f66596..2fac24dca9b 100644
--- a/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
+++ b/web-common/src/features/sources/modal/submitRemoteSourceForm.ts
@@ -64,8 +64,8 @@ export async function submitRemoteSourceForm(
     Object.entries(values).map(([key, value]) => {
       switch (key) {
         case "project_id":
-        case "athena_output_location":
-        case "athena_workgroup":
+        case "output_location":
+        case "workgroup":
           return [key, value];
         default:
           return [fromYupFriendlyKey(key), value];
diff --git a/web-common/src/features/sources/modal/yupSchemas.ts b/web-common/src/features/sources/modal/yupSchemas.ts
index 9474266edba..4dd2fc46280 100644
--- a/web-common/src/features/sources/modal/yupSchemas.ts
+++ b/web-common/src/features/sources/modal/yupSchemas.ts
@@ -79,8 +79,8 @@ export function getYupSchema(connector: V1ConnectorSpec) {
             "Source name must start with a letter or underscore and contain only letters, numbers, and underscores"
           )
           .required("Source name is required"),
-        athena_output_location: yup.string().required(),
-        athena_workgroup: yup.string(),
+        output_location: yup.string(),
+        workgroup: yup.string(),
       });
 
     default:

From 28f3e6ec5116797e882ac64a236f9baf3c6b5396 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Sun, 24 Sep 2023 00:05:56 +0300
Subject: [PATCH 29/40] Athena icon

---
 web-common/src/features/sources/modal/AddSourceModal.svelte | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/web-common/src/features/sources/modal/AddSourceModal.svelte b/web-common/src/features/sources/modal/AddSourceModal.svelte
index 03e9ef0a3a3..308ecb7a59a 100644
--- a/web-common/src/features/sources/modal/AddSourceModal.svelte
+++ b/web-common/src/features/sources/modal/AddSourceModal.svelte
@@ -22,6 +22,7 @@
   import LocalSourceUpload from "./LocalSourceUpload.svelte";
   import RemoteSourceForm from "./RemoteSourceForm.svelte";
   import RequestConnectorForm from "./RequestConnectorForm.svelte";
+  import AmazonAthena from "@rilldata/web-common/components/icons/connectors/AmazonAthena.svelte";
 
   export let open: boolean;
 
@@ -48,7 +49,7 @@
     // azure_blob_storage: MicrosoftAzureBlobStorage,
     // duckdb: DuckDB,
     bigquery: GoogleBigQuery,
-    // athena: AmazonAthena,
+    athena: AmazonAthena,
     motherduck: MotherDuck,
     // postgres: Postgres,
     local_file: LocalFile,

From 18b423dfc0aabe252e0433523334e83463707d7c Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 16:27:26 +0300
Subject: [PATCH 30/40] Removed the auto-resolving of AWS region

---
 runtime/drivers/athena/athena.go    |  2 ++
 runtime/drivers/athena/sql_store.go | 53 ++++-------------------------
 2 files changed, 8 insertions(+), 47 deletions(-)

diff --git a/runtime/drivers/athena/athena.go b/runtime/drivers/athena/athena.go
index 6aaeb92c108..76a7b470c59 100644
--- a/runtime/drivers/athena/athena.go
+++ b/runtime/drivers/athena/athena.go
@@ -40,6 +40,7 @@ var spec = drivers.Spec{
 			Key:         "workgroup",
 			DisplayName: "AWS Athena workgroup",
 			Description: "AWS Athena workgroup to use for queries.",
+			Placeholder: "primary",
 			Type:        drivers.StringPropertyType,
 			Required:    false,
 		},
@@ -47,6 +48,7 @@ var spec = drivers.Spec{
 			Key:         "region",
 			DisplayName: "AWS region",
 			Description: "AWS region to connect to Athena and the output location.",
+			Placeholder: "us-east-1",
 			Type:        drivers.StringPropertyType,
 			Required:    false,
 		},
diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 8b3722cb351..abbb26c4257 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -33,16 +33,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 		return nil, fmt.Errorf("failed to parse config: %w", err)
 	}
 
-	// Determine AWS region if it is not specified
-	determineAWSRegion := conf.AWSRegion == ""
-
-	awsRegion := conf.AWSRegion
-	if determineAWSRegion {
-		// AWS region is not specified, use 'us-east-1' for now
-		awsRegion = "us-east-1"
-	}
-
-	awsConfig, err := c.awsConfig(ctx, awsRegion)
+	awsConfig, err := c.awsConfig(ctx, conf.AWSRegion)
 	if err != nil {
 		return nil, err
 	}
@@ -67,22 +58,6 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 	unloadLocation := unloadURL.String()
 	unloadPath := strings.TrimPrefix(unloadURL.Path, "/")
 
-	// Determine actual AWS region and update the config if needed
-	if determineAWSRegion {
-		actualRegion, err := getActualAWSRegion(ctx, awsConfig, bucketName)
-		if err != nil {
-			return nil, err
-		}
-
-		if awsRegion != actualRegion {
-			awsRegion = actualRegion
-			awsConfig, err = c.awsConfig(ctx, awsRegion)
-			if err != nil {
-				return nil, err
-			}
-		}
-	}
-
 	cleanUp := func() error {
 		return deleteObjectsInPrefix(ctx, awsConfig, bucketName, unloadPath)
 	}
@@ -116,10 +91,11 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 }
 
 func (c *Connection) awsConfig(ctx context.Context, awsRegion string) (aws.Config, error) {
-	loadOptions := make([]func(*config.LoadOptions) error, 0)
-
-	if awsRegion != "" {
-		loadOptions = append(loadOptions, config.WithDefaultRegion(awsRegion))
+	loadOptions := []func(*config.LoadOptions) error{
+		// Setting the default region to an empty string, will result in the default region value being ignored
+		config.WithDefaultRegion("us-east-1"),
+		// Setting the region to an empty string, will result in the region value being ignored
+		config.WithRegion(awsRegion),
 	}
 
 	// If one of the static properties is specified: access key, secret key, or session token, use static credentials,
@@ -223,23 +199,6 @@ func resolveOutputLocation(ctx context.Context, client *athena.Client, conf *sou
 	return "", fmt.Errorf("either output_location or workgroup with an output location must be set")
 }
 
-func getActualAWSRegion(ctx context.Context, awsConfig aws.Config, bucketName string) (string, error) {
-	s3client := s3.NewFromConfig(awsConfig)
-
-	resp, err := s3client.GetBucketLocation(ctx, &s3.GetBucketLocationInput{
-		Bucket: &bucketName,
-	})
-	if err != nil {
-		return "", err
-	}
-
-	actualRegion := string(resp.LocationConstraint)
-	if actualRegion == "" { // For US East (N. Virginia) region
-		actualRegion = "us-east-1"
-	}
-	return actualRegion, nil
-}
-
 func openBucket(ctx context.Context, cfg aws.Config, bucket string) (*blob.Bucket, error) {
 	s3client := s3.NewFromConfig(cfg)
 	return s3blob.OpenBucketV2(ctx, s3client, bucket, nil)

From 679e85dfe53205845399a4b1adad11d3ac371a83 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 16:39:06 +0300
Subject: [PATCH 31/40] Simplified a clean-up process

---
 runtime/drivers/athena/sql_store.go | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index abbb26c4257..175a5777bed 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -58,18 +58,18 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 	unloadLocation := unloadURL.String()
 	unloadPath := strings.TrimPrefix(unloadURL.Path, "/")
 
-	cleanUp := func() error {
+	cleanupFn := func() error {
 		return deleteObjectsInPrefix(ctx, awsConfig, bucketName, unloadPath)
 	}
 
 	err = c.unload(ctx, client, conf, unloadLocation)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanUp())
+		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanupFn())
 	}
 
 	bucketObj, err := openBucket(ctx, awsConfig, bucketName)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanUp())
+		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanupFn())
 	}
 
 	opts := rillblob.Options{
@@ -78,15 +78,12 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanUp())
+		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanupFn())
 	}
 
 	return autoDeleteFileIterator{
 		FileIterator: it,
-		ctx:          ctx,
-		unloadPath:   unloadPath,
-		bucketName:   bucketName,
-		cfg:          awsConfig,
+		cleanupFn:    cleanupFn,
 	}, nil
 }
 
@@ -260,17 +257,14 @@ type sourceProperties struct {
 
 type autoDeleteFileIterator struct {
 	drivers.FileIterator
-	ctx        context.Context
-	cfg        aws.Config
-	bucketName string
-	unloadPath string
+	cleanupFn func() error
 }
 
-func (ci autoDeleteFileIterator) Close() error {
-	err := ci.FileIterator.Close()
+func (i autoDeleteFileIterator) Close() error {
+	err := i.FileIterator.Close()
 	if err != nil {
 		return err
 	}
 
-	return deleteObjectsInPrefix(ci.ctx, ci.cfg, ci.bucketName, ci.unloadPath)
+	return i.cleanupFn()
 }

From 3ab245d009fceb20e7d0f4e7ed757fedb175eb36 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 18:10:06 +0300
Subject: [PATCH 32/40] Updated according to previously merged changes

---
 runtime/drivers/athena/sql_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 175a5777bed..fcc6cafcbc0 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -24,7 +24,7 @@ import (
 )
 
 func (c *Connection) Query(_ context.Context, _ map[string]any) (drivers.RowIterator, error) {
-	return nil, fmt.Errorf("not implemented")
+	return nil, drivers.ErrNotImplemented
 }
 
 func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *drivers.QueryOption, _ drivers.Progress) (drivers.FileIterator, error) {

From 804de57c40b68f82ee8bd2c139e7f4d788f4ff95 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 22:07:57 +0300
Subject: [PATCH 33/40] Dash vs underscore

---
 runtime/drivers/athena/sql_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index fcc6cafcbc0..4ef2d2228c1 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -50,9 +50,9 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 	}
 
 	// outputLocation s3://bucket/path
-	// unloadLocation s3://bucket/path/rill_tmp_<uuid>
-	// unloadPath path/rill_tmp_<uuid>
-	unloadFolderName := "rill_tmp_" + uuid.New().String()
+	// unloadLocation s3://bucket/path/rill-tmp-<uuid>
+	// unloadPath path/rill-tmp-<uuid>
+	unloadFolderName := "rill-tmp-" + uuid.New().String()
 	bucketName := outputURL.Hostname()
 	unloadURL := outputURL.JoinPath(unloadFolderName)
 	unloadLocation := unloadURL.String()

From 591a4efdca5ee0cf48f5b03ec4fd58c0d178121d Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 22:08:33 +0300
Subject: [PATCH 34/40] A new line after a query

---
 runtime/drivers/athena/sql_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 4ef2d2228c1..c05d0b4964a 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -108,7 +108,7 @@ func (c *Connection) awsConfig(ctx context.Context, awsRegion string) (aws.Confi
 }
 
 func (c *Connection) unload(ctx context.Context, client *athena.Client, conf *sourceProperties, unloadLocation string) error {
-	finalSQL := fmt.Sprintf("UNLOAD (%s) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
+	finalSQL := fmt.Sprintf("UNLOAD (%s\n) TO '%s' WITH (format = 'PARQUET')", conf.SQL, unloadLocation)
 
 	executeParams := &athena.StartQueryExecutionInput{
 		QueryString: aws.String(finalSQL),

From bc7f95d3cd5da77f981e02c74c6c0331cf425c0f Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 22:11:33 +0300
Subject: [PATCH 35/40] Non-nil NextContinuationToken

---
 runtime/drivers/athena/sql_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index c05d0b4964a..b06d7fbbd03 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -238,7 +238,7 @@ func deleteObjectsInPrefix(ctx context.Context, cfg aws.Config, bucketName, pref
 			}
 		}
 
-		if out.IsTruncated {
+		if out.IsTruncated && out.NextContinuationToken != nil {
 			continuationToken = out.NextContinuationToken
 		} else {
 			break

From af69414ee185e4925752511bd28158224075cf32 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 22:22:59 +0300
Subject: [PATCH 36/40] ctx cancellation instead of a hardcoded timer

---
 runtime/drivers/athena/sql_store.go | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index b06d7fbbd03..a4b4f3b0259 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -135,12 +135,13 @@ func (c *Connection) unload(ctx context.Context, client *athena.Client, conf *so
 		return err
 	}
 
-	tm := time.NewTimer(5 * time.Minute)
-	defer tm.Stop()
 	for {
 		select {
-		case <-tm.C:
-			return fmt.Errorf("Athena ingestion timed out")
+		case <-ctx.Done():
+			_, err = client.StopQueryExecution(ctx, &athena.StopQueryExecutionInput{
+				QueryExecutionId: queryExecutionOutput.QueryExecutionId,
+			})
+			return errors.Join(ctx.Err(), err)
 		default:
 			status, err := client.GetQueryExecution(ctx, &athena.GetQueryExecutionInput{
 				QueryExecutionId: queryExecutionOutput.QueryExecutionId,
@@ -150,8 +151,10 @@ func (c *Connection) unload(ctx context.Context, client *athena.Client, conf *so
 			}
 
 			switch status.QueryExecution.Status.State {
-			case types2.QueryExecutionStateSucceeded, types2.QueryExecutionStateCancelled:
+			case types2.QueryExecutionStateSucceeded:
 				return nil
+			case types2.QueryExecutionStateCancelled:
+				return fmt.Errorf("Athena query execution cancelled")
 			case types2.QueryExecutionStateFailed:
 				return fmt.Errorf("Athena query execution failed %s", *status.QueryExecution.Status.AthenaError.ErrorMessage)
 			}

From 0bd4d25e2c3a3cb563a7d5cd7cf62fff0b9e7ada Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 22:52:52 +0300
Subject: [PATCH 37/40] Format for FileIterator

---
 runtime/drivers/athena/sql_store.go                      | 1 +
 runtime/drivers/bigquery/sql_store.go                    | 4 ++++
 runtime/drivers/blob/blobdownloader.go                   | 6 ++++++
 runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go | 9 ++-------
 runtime/drivers/duckdb/transporter/transporter_test.go   | 4 ++++
 runtime/drivers/object_store.go                          | 3 +++
 6 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index a4b4f3b0259..968dcaefcc1 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -74,6 +74,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 
 	opts := rillblob.Options{
 		GlobPattern: unloadPath + "/**",
+		Format:      "parquet",
 	}
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
diff --git a/runtime/drivers/bigquery/sql_store.go b/runtime/drivers/bigquery/sql_store.go
index e5b507e3ab1..40d89dd39a3 100644
--- a/runtime/drivers/bigquery/sql_store.go
+++ b/runtime/drivers/bigquery/sql_store.go
@@ -224,6 +224,10 @@ func (f *fileIterator) Size(unit drivers.ProgressUnit) (int64, bool) {
 	}
 }
 
+func (f *fileIterator) Format() string {
+	return ""
+}
+
 func (f *fileIterator) downloadAsJSONFile() error {
 	tf := time.Now()
 	defer func() {
diff --git a/runtime/drivers/blob/blobdownloader.go b/runtime/drivers/blob/blobdownloader.go
index 405ea07fa55..b1e82ffd7d9 100644
--- a/runtime/drivers/blob/blobdownloader.go
+++ b/runtime/drivers/blob/blobdownloader.go
@@ -65,6 +65,8 @@ type Options struct {
 	StorageLimitInBytes int64
 	// Retain files and only delete during close
 	KeepFilesUntilClose bool
+	// General blob format (json, csv, parquet, etc)
+	Format string
 }
 
 // sets defaults if not set by user
@@ -373,6 +375,10 @@ func (it *blobIterator) KeepFilesUntilClose(keepFilesUntilClose bool) {
 	it.opts.KeepFilesUntilClose = keepFilesUntilClose
 }
 
+func (it *blobIterator) Format() string {
+	return it.opts.Format
+}
+
 // todo :: ideally planner should take ownership of the bucket and return an iterator with next returning objectWithPlan
 func (it *blobIterator) plan() ([]*objectWithPlan, error) {
 	var (
diff --git a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
index a9e71491e40..48720820478 100644
--- a/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
+++ b/runtime/drivers/duckdb/transporter/sqlstore_to_duckDB.go
@@ -7,13 +7,11 @@ import (
 	"errors"
 	"fmt"
 	"math"
-	"reflect"
 	"time"
 
 	"github.com/marcboeker/go-duckdb"
 	runtimev1 "github.com/rilldata/rill/proto/gen/rill/runtime/v1"
 	"github.com/rilldata/rill/runtime/drivers"
-	"github.com/rilldata/rill/runtime/drivers/athena"
 	"github.com/rilldata/rill/runtime/pkg/fileutil"
 	"github.com/rilldata/rill/runtime/pkg/observability"
 	"go.uber.org/zap"
@@ -78,7 +76,6 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map
 	// TODO :: iteration over fileiterator is similar(apart from no schema changes possible here)
 	// to consuming fileIterator in objectStore_to_duckDB
 	// both can be refactored to follow same path
-	fromAthena := reflect.TypeOf(s.from).AssignableTo(reflect.TypeOf(&athena.Connection{}))
 	for iter.HasNext() {
 		files, err := iter.NextBatch(_sqlStoreIteratorBatchSize)
 		if err != nil {
@@ -86,10 +83,8 @@ func (s *sqlStoreToDuckDB) Transfer(ctx context.Context, srcProps, sinkProps map
 		}
 
 		format := fileutil.FullExt(files[0])
-		if fromAthena {
-			// Athena doesn't specify ".parquet" extension in output file names
-			// Append ".parquet" extension to the extension generated by Athena
-			format += ".parquet"
+		if iter.Format() != "" {
+			format += "." + iter.Format()
 		}
 
 		from, err := sourceReader(files, format, make(map[string]any))
diff --git a/runtime/drivers/duckdb/transporter/transporter_test.go b/runtime/drivers/duckdb/transporter/transporter_test.go
index c82193bff8d..65855373be3 100644
--- a/runtime/drivers/duckdb/transporter/transporter_test.go
+++ b/runtime/drivers/duckdb/transporter/transporter_test.go
@@ -53,6 +53,10 @@ func (m *mockIterator) Size(unit drivers.ProgressUnit) (int64, bool) {
 func (m *mockIterator) KeepFilesUntilClose(keepFilesUntilClose bool) {
 }
 
+func (m *mockIterator) Format() string {
+	return ""
+}
+
 var _ drivers.FileIterator = &mockIterator{}
 
 func TestIterativeCSVIngestionWithVariableSchema(t *testing.T) {
diff --git a/runtime/drivers/object_store.go b/runtime/drivers/object_store.go
index 48714fb8573..bb65d3b1904 100644
--- a/runtime/drivers/object_store.go
+++ b/runtime/drivers/object_store.go
@@ -27,4 +27,7 @@ type FileIterator interface {
 	// KeepFilesUntilClose marks the iterator to keep the files until close is called.
 	// This is used when the entire list of files is used at once in certain cases.
 	KeepFilesUntilClose(keepFilesUntilClose bool)
+	// Format returns general file format (json, csv, parquet, etc)
+	// Returns an empty string if there is no general format
+	Format() string
 }

From a0d18da0df0440974555312c7bf2fe94165a870c Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 23:18:04 +0300
Subject: [PATCH 38/40] deferred cleanupFn()

---
 runtime/drivers/athena/sql_store.go | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 968dcaefcc1..7c882bbe363 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -27,7 +27,7 @@ func (c *Connection) Query(_ context.Context, _ map[string]any) (drivers.RowIter
 	return nil, drivers.ErrNotImplemented
 }
 
-func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *drivers.QueryOption, _ drivers.Progress) (drivers.FileIterator, error) {
+func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *drivers.QueryOption, _ drivers.Progress) (outIt drivers.FileIterator, outErr error) {
 	conf, err := parseSourceProperties(props)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse config: %w", err)
@@ -64,12 +64,26 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 
 	err = c.unload(ctx, client, conf, unloadLocation)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("failed to unload: %w", err), cleanupFn())
+		unloadErr := fmt.Errorf("failed to unload: %w", err)
+		cleanupErr := cleanupFn()
+		if cleanupErr != nil {
+			cleanupErr = fmt.Errorf("cleanup error: %w", cleanupErr)
+		}
+		return nil, errors.Join(unloadErr, cleanupErr)
 	}
 
+	defer func() {
+		if outErr != nil {
+			cleanupErr := cleanupFn()
+			if cleanupErr != nil {
+				outErr = errors.Join(outErr, fmt.Errorf("cleanup error: %w", cleanupErr))
+			}
+		}
+	}()
+
 	bucketObj, err := openBucket(ctx, awsConfig, bucketName)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot open bucket %q: %w", bucketName, err), cleanupFn())
+		return nil, fmt.Errorf("cannot open bucket %q: %w", bucketName, err)
 	}
 
 	opts := rillblob.Options{
@@ -79,7 +93,7 @@ func (c *Connection) QueryAsFiles(ctx context.Context, props map[string]any, _ *
 
 	it, err := rillblob.NewIterator(ctx, bucketObj, opts, c.logger)
 	if err != nil {
-		return nil, errors.Join(fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err), cleanupFn())
+		return nil, fmt.Errorf("cannot download parquet output %q %w", opts.GlobPattern, err)
 	}
 
 	return autoDeleteFileIterator{

From a1d0eb946a57af489830966e901f944bf2b6c827 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 23:28:45 +0300
Subject: [PATCH 39/40] Aligned Athena query with a source config

---
 runtime/drivers/athena/sql_store.go | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/runtime/drivers/athena/sql_store.go b/runtime/drivers/athena/sql_store.go
index 7c882bbe363..32b533961dc 100644
--- a/runtime/drivers/athena/sql_store.go
+++ b/runtime/drivers/athena/sql_store.go
@@ -129,20 +129,14 @@ func (c *Connection) unload(ctx context.Context, client *athena.Client, conf *so
 		QueryString: aws.String(finalSQL),
 	}
 
-	// If output_location is set, use it and don't set workgroup because the workgroup can override the output location
-	// Otherwise use specified workgroup or the "primary" workgroup
-	// see https://docs.aws.amazon.com/athena/latest/ug/querying.html
 	if conf.OutputLocation != "" {
 		executeParams.ResultConfiguration = &types2.ResultConfiguration{
 			OutputLocation: aws.String(conf.OutputLocation),
 		}
-	} else {
-		workgroup := conf.Workgroup
-		if workgroup == "" {
-			// fallback to "primary" (default) workgroup if no workgroup is specified
-			workgroup = "primary"
-		}
-		executeParams.WorkGroup = aws.String(workgroup)
+	}
+
+	if conf.Workgroup != "" { // primary is used if nothing is set
+		executeParams.WorkGroup = aws.String(conf.Workgroup)
 	}
 
 	queryExecutionOutput, err := client.StartQueryExecution(ctx, executeParams)

From 480aa951c34a84a5d457a66932f49f66cd4cc676 Mon Sep 17 00:00:00 2001
From: "e.sevastyanov" <eugene.sevastianov@rilldata.com>
Date: Mon, 25 Sep 2023 23:41:25 +0300
Subject: [PATCH 40/40] Fixed a merge conflict

---
 runtime/drivers/blob/blobdownloader.go | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/runtime/drivers/blob/blobdownloader.go b/runtime/drivers/blob/blobdownloader.go
index bfa33219838..5674ba7b913 100644
--- a/runtime/drivers/blob/blobdownloader.go
+++ b/runtime/drivers/blob/blobdownloader.go
@@ -233,11 +233,7 @@ func (it *blobIterator) Next() ([]string, error) {
 		return nil, io.EOF
 	}
 
-	func (it *blobIterator) Format() string {
-	return it.opts.Format
-}
-
-// Track the batch for cleanup in the next iteration
+	// Track the batch for cleanup in the next iteration
 	it.lastBatch = batch
 
 	// Clients may change the slice. Creating a copy to ensure we delete the files on next batch/close.
@@ -246,6 +242,10 @@ func (it *blobIterator) Next() ([]string, error) {
 	return result, nil
 }
 
+func (it *blobIterator) Format() string {
+	return it.opts.Format
+}
+
 // TODO: Ideally planner should take ownership of the bucket and return an iterator with next returning objectWithPlan
 func (it *blobIterator) plan() ([]*objectWithPlan, error) {
 	var (
@@ -469,6 +469,10 @@ func (it *prefetchedIterator) Next() ([]string, error) {
 	return it.batch, nil
 }
 
+func (it *prefetchedIterator) Format() string {
+	return it.underlying.Format()
+}
+
 // downloadResult represents a successfully downloaded file
 type downloadResult struct {
 	path  string