From 5c222f942df689659ed67f22026302df8d8b02e9 Mon Sep 17 00:00:00 2001 From: Max Lapan Date: Fri, 10 Nov 2023 09:27:43 +0100 Subject: [PATCH] Access to public S3 buckets without credentials (#287) * Implement aws anonymous mode * Pk fix * Fix external refs * Unit test * Documentation about public bucket access * Release date * Scala formating * Fix changes file * Typo Co-authored-by: Christoph Pirkl * Update doc/user_guide/user_guide.md Co-authored-by: Christoph Pirkl --------- Co-authored-by: Christoph Pirkl --- doc/changes/changelog.md | 1 + doc/changes/changes_2.7.8.md | 11 +++++++++ doc/user_guide/user_guide.md | 24 +++++++++++-------- pk_generated_parent.pom | 2 +- pom.xml | 4 ++-- .../com/exasol/cloudetl/bucket/S3Bucket.scala | 22 ++++++++++++----- .../exasol/cloudetl/bucket/S3BucketTest.scala | 9 +++++++ 7 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 doc/changes/changes_2.7.8.md diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 847523c8..435ecf0b 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,5 +1,6 @@ # Changes +* [2.7.8](changes_2.7.8.md) * [2.7.7](changes_2.7.7.md) * [2.7.6](changes_2.7.6.md) * [2.7.5](changes_2.7.5.md) diff --git a/doc/changes/changes_2.7.8.md b/doc/changes/changes_2.7.8.md new file mode 100644 index 00000000..b8b36165 --- /dev/null +++ b/doc/changes/changes_2.7.8.md @@ -0,0 +1,11 @@ +# Cloud Storage Extension 2.7.8, released 2023-11-10 + +Code name: Access to public S3 buckets without credentials + +## Summary + +Implemented an option to access public S3 buckets without credentials. + +## Features + +* #283: Support publicly available S3 buckets without credentials diff --git a/doc/user_guide/user_guide.md b/doc/user_guide/user_guide.md index 0fe73d22..4fa22859 100644 --- a/doc/user_guide/user_guide.md +++ b/doc/user_guide/user_guide.md @@ -150,7 +150,7 @@ downloaded jar file is the same as the checksum provided in the releases. To check the SHA256 result of the local jar, run the command: ```sh -sha256sum exasol-cloud-storage-extension-2.7.7.jar +sha256sum exasol-cloud-storage-extension-2.7.8.jar ``` ### Building From Source @@ -180,7 +180,7 @@ mvn clean package -DskipTests=true ``` The assembled jar file should be located at -`target/exasol-cloud-storage-extension-2.7.7.jar`. +`target/exasol-cloud-storage-extension-2.7.8.jar`. ### Create an Exasol Bucket @@ -202,7 +202,7 @@ for the HTTP protocol. Upload the jar file using curl command: ```sh -curl -X PUT -T exasol-cloud-storage-extension-2.7.7.jar \ +curl -X PUT -T exasol-cloud-storage-extension-2.7.8.jar \ http://w:@exasol.datanode.domain.com:2580// ``` @@ -234,7 +234,7 @@ OPEN SCHEMA CLOUD_STORAGE_EXTENSION; CREATE OR REPLACE JAVA SET SCRIPT IMPORT_PATH(...) EMITS (...) AS %scriptclass com.exasol.cloudetl.scriptclasses.FilesImportQueryGenerator; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / CREATE OR REPLACE JAVA SCALAR SCRIPT IMPORT_METADATA(...) EMITS ( @@ -244,12 +244,12 @@ CREATE OR REPLACE JAVA SCALAR SCRIPT IMPORT_METADATA(...) EMITS ( end_index DECIMAL(36, 0) ) AS %scriptclass com.exasol.cloudetl.scriptclasses.FilesMetadataReader; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / CREATE OR REPLACE JAVA SET SCRIPT IMPORT_FILES(...) EMITS (...) AS %scriptclass com.exasol.cloudetl.scriptclasses.FilesDataImporter; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / ``` @@ -268,12 +268,12 @@ OPEN SCHEMA CLOUD_STORAGE_EXTENSION; CREATE OR REPLACE JAVA SET SCRIPT EXPORT_PATH(...) EMITS (...) AS %scriptclass com.exasol.cloudetl.scriptclasses.TableExportQueryGenerator; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / CREATE OR REPLACE JAVA SET SCRIPT EXPORT_TABLE(...) EMITS (ROWS_AFFECTED INT) AS %scriptclass com.exasol.cloudetl.scriptclasses.TableDataExporter; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / ``` @@ -407,13 +407,13 @@ CREATE OR REPLACE JAVA SCALAR SCRIPT IMPORT_METADATA(...) EMITS ( ) AS %jvmoption -DHTTPS_PROXY=http://username:password@10.10.1.10:1180 %scriptclass com.exasol.cloudetl.scriptclasses.FilesMetadataReader; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / CREATE OR REPLACE JAVA SET SCRIPT IMPORT_FILES(...) EMITS (...) AS %jvmoption -DHTTPS_PROXY=http://username:password@10.10.1.10:1180 %scriptclass com.exasol.cloudetl.scriptclasses.FilesDataImporter; - %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.7.jar; + %jar /buckets/bfsdefault//exasol-cloud-storage-extension-2.7.8.jar; / ``` @@ -722,6 +722,10 @@ S3_SESSION_TOKEN Please follow the [Amazon credentials management best practices][aws-creds] when creating credentials. +If you are accessing a public bucket, you don't need credentials. In such case, +you need to set `S3_ACCESS_KEY` and `S3_SECRET_KEY` to empty values: +`S3_ACCESS_KEY=;S3_SECRET_KEY=`. + [aws-creds]: https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html Another required parameter is the S3 endpoint, `S3_ENDPOINT`. An endpoint is the diff --git a/pk_generated_parent.pom b/pk_generated_parent.pom index 0e7880f9..a04f78e9 100644 --- a/pk_generated_parent.pom +++ b/pk_generated_parent.pom @@ -3,7 +3,7 @@ 4.0.0 com.exasol cloud-storage-extension-generated-parent - 2.7.7 + 2.7.8 pom UTF-8 diff --git a/pom.xml b/pom.xml index 008e25fa..aed3974d 100644 --- a/pom.xml +++ b/pom.xml @@ -3,14 +3,14 @@ 4.0.0 com.exasol cloud-storage-extension - 2.7.7 + 2.7.8 Cloud Storage Extension Exasol Cloud Storage Import And Export Extension https://github.com/exasol/cloud-storage-extension/ cloud-storage-extension-generated-parent com.exasol - 2.7.7 + 2.7.8 pk_generated_parent.pom diff --git a/src/main/scala/com/exasol/cloudetl/bucket/S3Bucket.scala b/src/main/scala/com/exasol/cloudetl/bucket/S3Bucket.scala index 6ea11cc5..85ea2ca4 100644 --- a/src/main/scala/com/exasol/cloudetl/bucket/S3Bucket.scala +++ b/src/main/scala/com/exasol/cloudetl/bucket/S3Bucket.scala @@ -52,6 +52,9 @@ final case class S3Bucket(path: String, params: StorageProperties) extends Bucke ) } + private[this] def isAnonymousAWSParams(properties: StorageProperties): Boolean = + properties.getString(S3_ACCESS_KEY).isEmpty && properties.getString(S3_SECRET_KEY).isEmpty + /** * @inheritdoc * @@ -83,15 +86,22 @@ final case class S3Bucket(path: String, params: StorageProperties) extends Bucke properties } - conf.set("fs.s3a.access.key", mergedProperties.getString(S3_ACCESS_KEY)) - conf.set("fs.s3a.secret.key", mergedProperties.getString(S3_SECRET_KEY)) - - if (mergedProperties.containsKey(S3_SESSION_TOKEN)) { + if (isAnonymousAWSParams(mergedProperties)) { conf.set( "fs.s3a.aws.credentials.provider", - classOf[org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider].getName() + classOf[org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider].getName() ) - conf.set("fs.s3a.session.token", mergedProperties.getString(S3_SESSION_TOKEN)) + } else { + conf.set("fs.s3a.access.key", mergedProperties.getString(S3_ACCESS_KEY)) + conf.set("fs.s3a.secret.key", mergedProperties.getString(S3_SECRET_KEY)) + + if (mergedProperties.containsKey(S3_SESSION_TOKEN)) { + conf.set( + "fs.s3a.aws.credentials.provider", + classOf[org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider].getName() + ) + conf.set("fs.s3a.session.token", mergedProperties.getString(S3_SESSION_TOKEN)) + } } properties.getProxyHost().foreach { proxyHost => diff --git a/src/test/scala/com/exasol/cloudetl/bucket/S3BucketTest.scala b/src/test/scala/com/exasol/cloudetl/bucket/S3BucketTest.scala index 3745cadc..f51e3e11 100644 --- a/src/test/scala/com/exasol/cloudetl/bucket/S3BucketTest.scala +++ b/src/test/scala/com/exasol/cloudetl/bucket/S3BucketTest.scala @@ -62,6 +62,15 @@ class S3BucketTest extends AbstractBucketTest { assertConfigurationProperties(bucket, configMappings - "fs.s3a.session.token") } + test(testName = "apply returns specific credentials provider for public access configuration") { + val exaMetadata = mockConnectionInfo("access", "S3_ACCESS_KEY=;S3_SECRET_KEY=") + val bucket = getBucket(defaultProperties, exaMetadata) + assert( + bucket.getConfiguration().get("fs.s3a.aws.credentials.provider") == + "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" + ) + } + test("apply returns S3Bucket with secret and session token from connection") { val exaMetadata = mockConnectionInfo("access", "S3_SECRET_KEY=secret;S3_SESSION_TOKEN=token") val bucket = getBucket(defaultProperties, exaMetadata)