Skip to content

Commit

Permalink
Object-store test containers improvements (#8258)
Browse files Browse the repository at this point in the history
* Add JUnit extensions to/for Azurite and Fake-GCS testcontainers
* Unify inferfaces/API
* Further prepare ITSparkIcebergNessieAzure
* Remove Hadoop-dependency from s3minio
* Let renovate keep Azurite up-to-date
* Fix issue for GcsContainer to let downloads work ("external-url") + bind listening port to single IP
* rename `:nessie-s3minio` to `:nessie-minio-testcontainer`
  • Loading branch information
snazy authored Mar 27, 2024
1 parent b3ba749 commit 834327f
Show file tree
Hide file tree
Showing 41 changed files with 1,202 additions and 167 deletions.
1 change: 1 addition & 0 deletions bom/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies {
api(project(":nessie-jaxrs-testextension"))
api(project(":nessie-jaxrs-tests"))
api(project(":nessie-keycloak-testcontainer"))
api(project(":nessie-minio-testcontainer"))
api(project(":nessie-nessie-testcontainer"))
api(project(":nessie-quarkus-auth"))
api(project(":nessie-quarkus-common"))
Expand Down
2 changes: 1 addition & 1 deletion gc/gc-iceberg-files/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ dependencies {

testFixturesApi(nessieProject("nessie-gc-base-tests"))
testFixturesApi(nessieProject("nessie-object-storage-mock"))
testFixturesApi(nessieProject("nessie-s3minio"))
testFixturesApi(nessieProject("nessie-minio-testcontainer"))

testFixturesRuntimeOnly(libs.logback.classic)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.concurrent.ForkJoinTask;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.extension.ExtendWith;
Expand All @@ -45,10 +46,12 @@ public class ITIcebergS3Files extends AbstractFiles {
void setUp(@Minio MinioAccess minio) {
this.minio = minio;
this.baseUri = minio.s3BucketUri(BUCKET_URI_PREFIX);
Configuration config = new Configuration();
minio.hadoopConfig().forEach(config::set);
this.s3 =
IcebergFiles.builder()
.properties(minio.icebergProperties())
.hadoopConfiguration(minio.hadoopConfiguration())
.hadoopConfiguration(config)
.build();
}

Expand Down
2 changes: 1 addition & 1 deletion gc/gc-iceberg-inttest/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies {
implementation(nessieProject("nessie-gc-iceberg-mock"))
implementation(nessieProject("nessie-gc-iceberg-files"))
implementation(nessieProject("nessie-object-storage-mock"))
implementation(nessieProject("nessie-s3minio"))
implementation(nessieProject("nessie-minio-testcontainer"))

implementation(platform(libs.jackson.bom))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,10 @@ private IcebergTable icebergTableFromLastCommit(String branch) throws NessieNotF
}

protected Set<URI> allFiles(IcebergFiles icebergFiles) throws NessieFileIOException {
try (Stream<FileReference> list = icebergFiles.listRecursively(s3BucketUri())) {
try (Stream<FileReference> list = icebergFiles.listRecursively(bucketUri())) {
return list.map(FileReference::absolutePath).collect(Collectors.toCollection(TreeSet::new));
}
}

protected abstract URI s3BucketUri();
protected abstract URI bucketUri();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,75 +19,51 @@
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.azure.AzureProperties;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.extension.ExtendWith;
import org.projectnessie.gc.iceberg.files.IcebergFiles;
import org.projectnessie.testing.azurite.AzuriteContainer;
import org.projectnessie.testing.azurite.Azurite;
import org.projectnessie.testing.azurite.AzuriteAccess;
import org.projectnessie.testing.azurite.AzuriteExtension;

@Disabled(
"Iceberg-azure cannot use Azurite (emulator), as it does not allow setting a shared-secret (user/pass)")
// org.apache.iceberg.azure.AzureProperties.applyClientConfiguration only allows SAS and default,
// but not
// UsernamePasswordCredential, although even Hadoop-Azure would work with it.
@Disabled("Needs an Iceberg release with https://github.com/apache/iceberg/pull/10045")
@ExtendWith(AzuriteExtension.class)
public class ITSparkIcebergNessieAzure extends AbstractITSparkIcebergNessieObjectStorage {

private static AzuriteContainer azuriteContainer;
public static final String BUCKET_URI = "/my/prefix";

@BeforeAll
static void startAzurite() {
azuriteContainer = new AzuriteContainer();
azuriteContainer.start();
}

@AfterAll
static void stopAzurite() {
azuriteContainer.stop();
}

@BeforeEach
void createStorageContainer() {
azuriteContainer.createStorageContainer();
}

@AfterEach
void deleteStorageContainer() {
azuriteContainer.deleteStorageContainer();
}
private static @Azurite AzuriteAccess azuriteAccess;

@Override
protected String warehouseURI() {
return azuriteContainer.location("");
return azuriteAccess.location(BUCKET_URI);
}

@Override
protected Map<String, String> sparkHadoop() {
return azuriteContainer.hadoopConfig();
return azuriteAccess.hadoopConfig();
}

@Override
protected Map<String, String> nessieParams() {
Map<String, String> r = new HashMap<>(super.nessieParams());
r.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.azure.adlsv2.ADLSFileIO");
r.put(AzureProperties.ADLS_CONNECTION_STRING_PREFIX, azuriteContainer.endpoint());
r.putAll(azuriteAccess.icebergProperties());
return r;
}

@Override
IcebergFiles icebergFiles() {
Map<String, String> props = new HashMap<>();

Configuration conf = new Configuration();
azuriteAccess.hadoopConfig().forEach(conf::set);

return IcebergFiles.builder().properties(props).hadoopConfiguration(conf).build();
return IcebergFiles.builder()
.properties(azuriteAccess.icebergProperties())
.hadoopConfiguration(conf)
.build();
}

@Override
protected URI s3BucketUri() {
return URI.create(azuriteContainer.location(""));
protected URI bucketUri() {
return URI.create(azuriteAccess.location(BUCKET_URI));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,70 +15,53 @@
*/
package org.projectnessie.gc.iceberg.inttest;

import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Storage;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.extension.ExtendWith;
import org.projectnessie.gc.iceberg.files.IcebergFiles;
import org.projectnessie.testing.gcs.GCSContainer;
import org.projectnessie.testing.gcs.Gcs;
import org.projectnessie.testing.gcs.GcsAccess;
import org.projectnessie.testing.gcs.GcsExtension;

@ExtendWith(GcsExtension.class)
public class ITSparkIcebergNessieGCP extends AbstractITSparkIcebergNessieObjectStorage {

private static GCSContainer gcsContainer;
private static Storage gcsService;
public static final String BUCKET_URI = "/my/prefix";

@BeforeAll
static void setupGcs() {
gcsContainer = new GCSContainer();
gcsContainer.start();

gcsService = gcsContainer.newStorage();
}

@AfterAll
static void stopGcs() throws Exception {
gcsService.close();
}
private static @Gcs GcsAccess gcsAccess;

@Override
protected String warehouseURI() {
return gcsContainer.bucketUri();
return gcsAccess.bucketUri(BUCKET_URI).toString();
}

@Override
protected Map<String, String> sparkHadoop() {
return gcsContainer.hadoopConfig();
return gcsAccess.hadoopConfig();
}

@Override
protected Map<String, String> nessieParams() {
Map<String, String> r = new HashMap<>(super.nessieParams());
r.putAll(gcsContainer.icebergProperties());
r.putAll(gcsAccess.icebergProperties());
return r;
}

@AfterEach
void purgeGcs() {
gcsService.list(gcsContainer.bucket()).iterateAll().forEach(Blob::delete);
}

@Override
IcebergFiles icebergFiles() {
Map<String, String> props = gcsContainer.icebergProperties();

Configuration conf = new Configuration();
gcsContainer.hadoopConfig().forEach(conf::set);
gcsAccess.hadoopConfig().forEach(conf::set);

return IcebergFiles.builder().properties(props).hadoopConfiguration(conf).build();
return IcebergFiles.builder()
.properties(gcsAccess.icebergProperties())
.hadoopConfiguration(conf)
.build();
}

@Override
protected URI s3BucketUri() {
return URI.create(gcsContainer.bucketUri());
protected URI bucketUri() {
return gcsAccess.bucketUri(BUCKET_URI);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.extension.ExtendWith;
Expand Down Expand Up @@ -47,12 +48,7 @@ protected String warehouseURI() {

@Override
protected Map<String, String> sparkHadoop() {
Map<String, String> r = new HashMap<>();
r.put("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem");
r.put("fs.s3a.access.key", minio.accessKey());
r.put("fs.s3a.secret.key", minio.secretKey());
r.put("fs.s3a.endpoint", minio.s3endpoint());
return r;
return minio.hadoopConfig();
}

@Override
Expand Down Expand Up @@ -94,14 +90,16 @@ void purgeS3() {

@Override
IcebergFiles icebergFiles() {
Configuration conf = new Configuration();
minio.hadoopConfig().forEach(conf::set);
return IcebergFiles.builder()
.properties(minio.icebergProperties())
.hadoopConfiguration(minio.hadoopConfiguration())
.hadoopConfiguration(conf)
.build();
}

@Override
protected URI s3BucketUri() {
protected URI bucketUri() {
return minio.s3BucketUri(S3_BUCKET_URI);
}
}
2 changes: 1 addition & 1 deletion gc/gc-tool-inttest/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dependencies {
intTestImplementation(
nessieProject("nessie-spark-extensions-basetests_${sparkScala.scalaMajorVersion}")
)
intTestImplementation(nessieProject("nessie-s3minio"))
intTestImplementation(nessieProject("nessie-minio-testcontainer"))

intTestImplementation("org.apache.spark:spark-sql_${sparkScala.scalaMajorVersion}") {
forSpark(sparkScala.sparkVersion)
Expand Down
2 changes: 1 addition & 1 deletion gradle/projects.main.properties
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ nessie-quarkus=servers/quarkus-server
nessie-quarkus-tests=servers/quarkus-tests
nessie-rest-common=servers/rest-common
nessie-rest-services=servers/rest-services
nessie-s3minio=testing/s3minio
nessie-minio-testcontainer=testing/minio-container
nessie-multi-env-test-engine=testing/multi-env-test-engine
nessie-services=servers/services
nessie-services-config=servers/services-config
Expand Down
6 changes: 6 additions & 0 deletions testing/azurite-container/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ dependencies {

compileOnly(libs.immutables.value.annotations)
annotationProcessor(libs.immutables.value.processor)

compileOnly(platform(libs.junit.bom))
compileOnly("org.junit.jupiter:junit-jupiter-api")

intTestImplementation(libs.bundles.junit.testing)
intTestRuntimeOnly(libs.logback.classic)
}

tasks.withType(Test::class.java).configureEach {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.testing.azurite;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.azure.storage.file.datalake.DataLakeFileClient;
import com.azure.storage.file.datalake.DataLakeServiceClient;
import java.io.InputStream;
import java.io.OutputStream;
import org.assertj.core.api.SoftAssertions;
import org.assertj.core.api.junit.jupiter.InjectSoftAssertions;
import org.assertj.core.api.junit.jupiter.SoftAssertionsExtension;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;

@ExtendWith({AzuriteExtension.class, SoftAssertionsExtension.class})
public class ITAzuriteExtension {
@InjectSoftAssertions private SoftAssertions soft;

@Test
public void smokeTest(
@Azurite(account = "myaccesskey", secret = "mysecretkey", storageContainer = "mybucket")
AzuriteAccess azurite)
throws Exception {
soft.assertThat(azurite.endpointHostPort()).isNotEmpty();
soft.assertThat(azurite.endpoint()).isNotEmpty().startsWith("http");
soft.assertThat(azurite.endpoint()).isNotEmpty().startsWith("http");

soft.assertThat(azurite.storageContainer()).isNotEmpty().isEqualTo("mybucket");

soft.assertThat(azurite.icebergProperties())
.containsEntry("io-impl", "org.apache.iceberg.azure.adlsv2.ADLSFileIO")
.containsEntry("adls.connection-string." + azurite.accountFq(), azurite.endpoint())
.containsEntry("adls.auth.shared-key.account.name", azurite.account())
.containsEntry("adls.auth.shared-key.account.key", azurite.secretBase64());

soft.assertThat(azurite.hadoopConfig())
.isNotNull()
.containsEntry("fs.azure.impl", "org.apache.hadoop.fs.azure.AzureNativeFileSystemStore")
.containsEntry("fs.AbstractFileSystem.azure.impl", "org.apache.hadoop.fs.azurebfs.Abfs")
.containsEntry("fs.azure.always.use.https", "false")
.containsEntry("fs.azure.abfs.endpoint", azurite.endpointHostPort())
.containsEntry("fs.azure.account.auth.type", "SharedKey")
.containsEntry("fs.azure.storage.emulator.account.name", azurite.account())
.containsEntry("fs.azure.account.key." + azurite.accountFq(), azurite.secretBase64());

DataLakeServiceClient client = azurite.serviceClient();
byte[] data = "hello world".getBytes(UTF_8);
String key = "some-key";

soft.assertThat(azurite.location("some-key"))
.isEqualTo("abfs://" + azurite.storageContainer() + "@" + azurite.accountFq() + "/" + key);

DataLakeFileClient fileClient =
client.getFileSystemClient(azurite.storageContainer()).getFileClient(key);
try (OutputStream output = fileClient.getOutputStream()) {
output.write(data);
}

try (InputStream input = fileClient.openInputStream().getInputStream()) {
soft.assertThat(input.readAllBytes()).isEqualTo(data);
}
}
}
Loading

0 comments on commit 834327f

Please sign in to comment.