Skip to content

Commit

Permalink
Nessie-GC: Support Azure Blob Storage (experimental) (#7715)
Browse files Browse the repository at this point in the history
Adding support for Azure Blob Storage as experimental. Unfortunately there's no way to test the integration in CI, although an emulator (Azurite) is available, because Iceberg does not allow using plain username/password for that purpose.
  • Loading branch information
snazy authored Nov 15, 2023
1 parent 48c0d27 commit 5b78998
Show file tree
Hide file tree
Showing 11 changed files with 259 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ as necessary. Empty sections will not end in the release notes.
### New Features

- Nessie-GC: Support Google Cloud Storage (GCS) (experimental)
- Nessie-GC: Support Azure Blob Storage (experimental)

### Changes

Expand Down
1 change: 1 addition & 0 deletions bom/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ extra["maven.name"] = "Nessie - Bill of Materials (BOM)"
dependencies {
constraints {
api(rootProject)
api(project(":nessie-azurite-testcontainer"))
api(project(":nessie-client"))
api(project(":nessie-client-testextension"))
api(project(":nessie-combined-cs"))
Expand Down
1 change: 1 addition & 0 deletions gc/gc-iceberg-files/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dependencies {
implementation(libs.iceberg.core)
implementation(libs.iceberg.aws)
implementation(libs.iceberg.gcp)
implementation(libs.iceberg.azure)

compileOnly(libs.errorprone.annotations)
compileOnly(libs.immutables.value.annotations)
Expand Down
5 changes: 5 additions & 0 deletions gc/gc-iceberg-inttest/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies {
implementation(libs.iceberg.core)
implementation(libs.iceberg.aws)
implementation(libs.iceberg.gcp)
implementation(libs.iceberg.azure)

compileOnly(libs.errorprone.annotations)
compileOnly(libs.microprofile.openapi)
Expand Down Expand Up @@ -79,6 +80,7 @@ dependencies {
intTestRuntimeOnly(libs.iceberg.hive.metastore)
intTestRuntimeOnly(libs.iceberg.aws)
intTestRuntimeOnly(libs.iceberg.gcp)
intTestRuntimeOnly(libs.iceberg.azure)

intTestRuntimeOnly(libs.hadoop.client)
intTestRuntimeOnly(libs.hadoop.aws)
Expand All @@ -97,6 +99,9 @@ dependencies {
intTestRuntimeOnly(libs.google.cloud.nio)
intTestRuntimeOnly(libs.google.cloud.gcs.connector)

intTestImplementation(project(":nessie-azurite-testcontainer"))
intTestRuntimeOnly(libs.hadoop.azure)

intTestCompileOnly(libs.immutables.builder)
intTestCompileOnly(libs.immutables.value.annotations)
intTestAnnotationProcessor(libs.immutables.value.processor)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.gc.iceberg.inttest;

import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.azure.AzureProperties;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.projectnessie.gc.iceberg.files.IcebergFiles;
import org.projectnessie.testing.azurite.AzuriteContainer;

@Disabled(
"Iceberg-azure cannot use Azurite (emulator), as it does not allow setting a shared-secret (user/pass)")
// org.apache.iceberg.azure.AzureProperties.applyClientConfiguration only allows SAS and default,
// but not
// UsernamePasswordCredential, although even Hadoop-Azure would work with it.
public class ITSparkIcebergNessieAzure extends AbstractITSparkIcebergNessieObjectStorage {

private static AzuriteContainer azuriteContainer;

@BeforeAll
static void startAzurite() {
azuriteContainer = new AzuriteContainer();
azuriteContainer.start();
}

@AfterAll
static void stopAzurite() {
azuriteContainer.stop();
}

@BeforeEach
void createStorageContainer() {
azuriteContainer.createStorageContainer();
}

@AfterEach
void deleteStorageContainer() {
azuriteContainer.deleteStorageContainer();
}

@Override
protected String warehouseURI() {
return azuriteContainer.location("");
}

@Override
protected Map<String, String> sparkHadoop() {
return azuriteContainer.hadoopConfig();
}

@Override
protected Map<String, String> nessieParams() {
Map<String, String> r = new HashMap<>(super.nessieParams());
r.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.azure.adlsv2.ADLSFileIO");
r.put(AzureProperties.ADLS_CONNECTION_STRING_PREFIX, azuriteContainer.endpoint());
return r;
}

@Override
IcebergFiles icebergFiles() {
Map<String, String> props = new HashMap<>();

Configuration conf = new Configuration();

return IcebergFiles.builder().properties(props).hadoopConfiguration(conf).build();
}

@Override
protected URI s3BucketUri() {
return URI.create(azuriteContainer.location(""));
}
}
6 changes: 6 additions & 0 deletions gc/gc-tool-inttest/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ dependencies {
intTestRuntimeOnly(libs.iceberg.hive.metastore)
intTestRuntimeOnly(libs.iceberg.aws)
intTestRuntimeOnly(libs.iceberg.gcp)
intTestRuntimeOnly(libs.iceberg.azure)
intTestRuntimeOnly(libs.iceberg.nessie)
intTestRuntimeOnly(libs.iceberg.core)
intTestRuntimeOnly(
Expand All @@ -95,6 +96,11 @@ dependencies {
intTestRuntimeOnly("com.google.cloud:google-cloud-storage")
intTestRuntimeOnly(libs.google.cloud.nio)

intTestRuntimeOnly(platform(libs.azuresdk.bom))
intTestRuntimeOnly("com.azure:azure-storage-file-datalake")
intTestRuntimeOnly("com.azure:azure-identity")
intTestRuntimeOnly(libs.hadoop.azure)

intTestCompileOnly("com.fasterxml.jackson.core:jackson-annotations")
intTestCompileOnly(libs.microprofile.openapi)

Expand Down
6 changes: 6 additions & 0 deletions gc/gc-tool/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ dependencies {
runtimeOnly(libs.iceberg.hive.metastore)
runtimeOnly(libs.iceberg.aws)
runtimeOnly(libs.iceberg.gcp)
runtimeOnly(libs.iceberg.azure)

// hadoop-common brings Jackson in ancient versions, pulling in the Jackson BOM to avoid that
implementation(platform(libs.jackson.bom))
Expand Down Expand Up @@ -69,6 +70,11 @@ dependencies {
runtimeOnly(libs.google.cloud.nio)
runtimeOnly(libs.google.cloud.gcs.connector)

implementation(platform(libs.azuresdk.bom))
runtimeOnly("com.azure:azure-storage-file-datalake")
runtimeOnly("com.azure:azure-identity")
runtimeOnly(libs.hadoop.azure)

implementation(libs.picocli)
annotationProcessor(libs.picocli.codegen)

Expand Down
3 changes: 3 additions & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ assertj-core = { module = "org.assertj:assertj-core", version = "3.24.2" }
avro = { module = "org.apache.avro:avro", version = "1.11.3" }
awaitility = { module = "org.awaitility:awaitility", version = "4.2.0" }
awssdk-bom = { module = "software.amazon.awssdk:bom", version = "2.21.23" }
azuresdk-bom = { module = "com.azure:azure-sdk-bom", version = "1.2.18" }
bouncycastle-bcpkix = { module = "org.bouncycastle:bcpkix-jdk15on", version.ref = "bouncycastle" }
bouncycastle-bcprov = { module = "org.bouncycastle:bcprov-jdk15on", version.ref = "bouncycastle" }
cassandra-driver-bom = { module = "com.datastax.oss:java-driver-bom", version = "4.17.0" }
Expand All @@ -65,11 +66,13 @@ google-java-format = { module = "com.google.googlejavaformat:google-java-format"
guava = { module = "com.google.guava:guava", version = "32.1.3-jre" }
h2 = { module = "com.h2database:h2", version = "2.2.224" }
hadoop-aws = { module = "org.apache.hadoop:hadoop-aws", version.ref = "hadoop" }
hadoop-azure = { module = "org.apache.hadoop:hadoop-azure", version.ref = "hadoop" }
hadoop-client = { module = "org.apache.hadoop:hadoop-client", version.ref = "hadoop" }
hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" }
hibernate-validator-cdi = { module = "org.hibernate:hibernate-validator-cdi", version = "6.2.5.Final" }
iceberg-api = { module = "org.apache.iceberg:iceberg-api", version.ref = "iceberg" }
iceberg-aws = { module = "org.apache.iceberg:iceberg-aws", version.ref = "iceberg" }
iceberg-azure = { module = "org.apache.iceberg:iceberg-azure", version.ref = "iceberg" }
iceberg-bundled-guava = { module = "org.apache.iceberg:iceberg-bundled-guava", version.ref = "iceberg" }
iceberg-common = { module = "org.apache.iceberg:iceberg-common", version.ref = "iceberg" }
iceberg-core = { module = "org.apache.iceberg:iceberg-core", version.ref = "iceberg" }
Expand Down
1 change: 1 addition & 0 deletions gradle/projects.main.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
nessie-azurite-testcontainer=testing/azurite-container
nessie-bom=bom
nessie-client=api/client
nessie-client-testextension=api/client-testextension
Expand Down
41 changes: 41 additions & 0 deletions testing/azurite-container/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

plugins { id("nessie-conventions-iceberg") }

extra["maven.name"] = "Nessie - Azurite testcontainer"

dependencies {
implementation(libs.slf4j.api)
api(platform(libs.testcontainers.bom))
api("org.testcontainers:testcontainers")

api(platform(libs.azuresdk.bom))
api("com.azure:azure-storage-file-datalake")
api("com.azure:azure-identity")

compileOnly(libs.jakarta.annotation.api)
compileOnly(libs.findbugs.jsr305)
compileOnly(libs.errorprone.annotations)

compileOnly(libs.immutables.value.annotations)
annotationProcessor(libs.immutables.value.processor)
}

tasks.withType(Test::class.java).configureEach {
environment("AZURE_USERNAME", "account")
environment("AZURE_PASSWORD", "key")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.testing.azurite;

import com.azure.storage.common.StorageSharedKeyCredential;
import com.azure.storage.file.datalake.DataLakeServiceClient;
import com.azure.storage.file.datalake.DataLakeServiceClientBuilder;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.wait.strategy.LogMessageWaitStrategy;

public class AzuriteContainer extends GenericContainer<AzuriteContainer> {

private static final int DEFAULT_PORT = 10000; // default blob service port
private static final String DEFAULT_IMAGE = "mcr.microsoft.com/azure-storage/azurite";
private static final String DEFAULT_TAG = "latest";
private static final String LOG_WAIT_REGEX =
"Azurite Blob service is successfully listening at .*";

public static final String ACCOUNT = "account";
public static final String ACCOUNT_FQ = ACCOUNT + ".dfs.core.windows.net";
public static final String KEY = "key";
public static final String KEY_BASE64 =
new String(Base64.getEncoder().encode(KEY.getBytes(StandardCharsets.UTF_8)));
;
public static final String STORAGE_CONTAINER = "container";

public AzuriteContainer() {
this(DEFAULT_IMAGE + ":" + DEFAULT_TAG);
}

public AzuriteContainer(String image) {
super(image == null ? DEFAULT_IMAGE + ":" + DEFAULT_TAG : image);
this.addExposedPort(DEFAULT_PORT);
this.addEnv("AZURITE_ACCOUNTS", ACCOUNT + ":" + KEY_BASE64);
this.setWaitStrategy(new LogMessageWaitStrategy().withRegEx(LOG_WAIT_REGEX));
}

public void createStorageContainer() {
serviceClient().createFileSystem(STORAGE_CONTAINER);
}

public void deleteStorageContainer() {
serviceClient().deleteFileSystem(STORAGE_CONTAINER);
}

public DataLakeServiceClient serviceClient() {
return new DataLakeServiceClientBuilder()
.endpoint(endpoint())
.credential(credential())
.buildClient();
}

public String location(String path) {
return String.format("abfs://%s@%s/%s", STORAGE_CONTAINER, ACCOUNT_FQ, path);
}

public String endpoint() {
return String.format("http://%s/%s", endpointHostPort(), ACCOUNT);
}

public String endpointHostPort() {
return String.format("%s:%d", getHost(), getMappedPort(DEFAULT_PORT));
}

public StorageSharedKeyCredential credential() {
return new StorageSharedKeyCredential(ACCOUNT, KEY_BASE64);
}

public Map<String, String> hadoopConfig() {
Map<String, String> r = new HashMap<>();

r.put("fs.azure.impl", "org.apache.hadoop.fs.azure.AzureNativeFileSystemStore");
r.put("fs.AbstractFileSystem.azure.impl", "org.apache.hadoop.fs.azurebfs.Abfs");

r.put("fs.azure.always.use.https", "false");
r.put("fs.azure.abfs.endpoint", endpointHostPort());

r.put("fs.azure.account.auth.type", "SharedKey");
r.put("fs.azure.storage.emulator.account.name", ACCOUNT_FQ);
r.put("fs.azure.account.key." + ACCOUNT_FQ, KEY_BASE64);

return r;
}
}

0 comments on commit 5b78998

Please sign in to comment.