-
Notifications
You must be signed in to change notification settings - Fork 136
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Nessie-GC: Support Azure Blob Storage (experimental) (#7715)
Adding support for Azure Blob Storage as experimental. Unfortunately there's no way to test the integration in CI, although an emulator (Azurite) is available, because Iceberg does not allow using plain username/password for that purpose.
- Loading branch information
Showing
11 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
...test/src/intTest/java/org/projectnessie/gc/iceberg/inttest/ITSparkIcebergNessieAzure.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright (C) 2022 Dremio | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.projectnessie.gc.iceberg.inttest; | ||
|
||
import java.net.URI; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.iceberg.CatalogProperties; | ||
import org.apache.iceberg.azure.AzureProperties; | ||
import org.junit.jupiter.api.AfterAll; | ||
import org.junit.jupiter.api.AfterEach; | ||
import org.junit.jupiter.api.BeforeAll; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Disabled; | ||
import org.projectnessie.gc.iceberg.files.IcebergFiles; | ||
import org.projectnessie.testing.azurite.AzuriteContainer; | ||
|
||
@Disabled( | ||
"Iceberg-azure cannot use Azurite (emulator), as it does not allow setting a shared-secret (user/pass)") | ||
// org.apache.iceberg.azure.AzureProperties.applyClientConfiguration only allows SAS and default, | ||
// but not | ||
// UsernamePasswordCredential, although even Hadoop-Azure would work with it. | ||
public class ITSparkIcebergNessieAzure extends AbstractITSparkIcebergNessieObjectStorage { | ||
|
||
private static AzuriteContainer azuriteContainer; | ||
|
||
@BeforeAll | ||
static void startAzurite() { | ||
azuriteContainer = new AzuriteContainer(); | ||
azuriteContainer.start(); | ||
} | ||
|
||
@AfterAll | ||
static void stopAzurite() { | ||
azuriteContainer.stop(); | ||
} | ||
|
||
@BeforeEach | ||
void createStorageContainer() { | ||
azuriteContainer.createStorageContainer(); | ||
} | ||
|
||
@AfterEach | ||
void deleteStorageContainer() { | ||
azuriteContainer.deleteStorageContainer(); | ||
} | ||
|
||
@Override | ||
protected String warehouseURI() { | ||
return azuriteContainer.location(""); | ||
} | ||
|
||
@Override | ||
protected Map<String, String> sparkHadoop() { | ||
return azuriteContainer.hadoopConfig(); | ||
} | ||
|
||
@Override | ||
protected Map<String, String> nessieParams() { | ||
Map<String, String> r = new HashMap<>(super.nessieParams()); | ||
r.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.azure.adlsv2.ADLSFileIO"); | ||
r.put(AzureProperties.ADLS_CONNECTION_STRING_PREFIX, azuriteContainer.endpoint()); | ||
return r; | ||
} | ||
|
||
@Override | ||
IcebergFiles icebergFiles() { | ||
Map<String, String> props = new HashMap<>(); | ||
|
||
Configuration conf = new Configuration(); | ||
|
||
return IcebergFiles.builder().properties(props).hadoopConfiguration(conf).build(); | ||
} | ||
|
||
@Override | ||
protected URI s3BucketUri() { | ||
return URI.create(azuriteContainer.location("")); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Copyright (C) 2023 Dremio | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
plugins { id("nessie-conventions-iceberg") } | ||
|
||
extra["maven.name"] = "Nessie - Azurite testcontainer" | ||
|
||
dependencies { | ||
implementation(libs.slf4j.api) | ||
api(platform(libs.testcontainers.bom)) | ||
api("org.testcontainers:testcontainers") | ||
|
||
api(platform(libs.azuresdk.bom)) | ||
api("com.azure:azure-storage-file-datalake") | ||
api("com.azure:azure-identity") | ||
|
||
compileOnly(libs.jakarta.annotation.api) | ||
compileOnly(libs.findbugs.jsr305) | ||
compileOnly(libs.errorprone.annotations) | ||
|
||
compileOnly(libs.immutables.value.annotations) | ||
annotationProcessor(libs.immutables.value.processor) | ||
} | ||
|
||
tasks.withType(Test::class.java).configureEach { | ||
environment("AZURE_USERNAME", "account") | ||
environment("AZURE_PASSWORD", "key") | ||
} |
101 changes: 101 additions & 0 deletions
101
...g/azurite-container/src/main/java/org/projectnessie/testing/azurite/AzuriteContainer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* | ||
* Copyright (C) 2022 Dremio | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.projectnessie.testing.azurite; | ||
|
||
import com.azure.storage.common.StorageSharedKeyCredential; | ||
import com.azure.storage.file.datalake.DataLakeServiceClient; | ||
import com.azure.storage.file.datalake.DataLakeServiceClientBuilder; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Base64; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import org.testcontainers.containers.GenericContainer; | ||
import org.testcontainers.containers.wait.strategy.LogMessageWaitStrategy; | ||
|
||
public class AzuriteContainer extends GenericContainer<AzuriteContainer> { | ||
|
||
private static final int DEFAULT_PORT = 10000; // default blob service port | ||
private static final String DEFAULT_IMAGE = "mcr.microsoft.com/azure-storage/azurite"; | ||
private static final String DEFAULT_TAG = "latest"; | ||
private static final String LOG_WAIT_REGEX = | ||
"Azurite Blob service is successfully listening at .*"; | ||
|
||
public static final String ACCOUNT = "account"; | ||
public static final String ACCOUNT_FQ = ACCOUNT + ".dfs.core.windows.net"; | ||
public static final String KEY = "key"; | ||
public static final String KEY_BASE64 = | ||
new String(Base64.getEncoder().encode(KEY.getBytes(StandardCharsets.UTF_8))); | ||
; | ||
public static final String STORAGE_CONTAINER = "container"; | ||
|
||
public AzuriteContainer() { | ||
this(DEFAULT_IMAGE + ":" + DEFAULT_TAG); | ||
} | ||
|
||
public AzuriteContainer(String image) { | ||
super(image == null ? DEFAULT_IMAGE + ":" + DEFAULT_TAG : image); | ||
this.addExposedPort(DEFAULT_PORT); | ||
this.addEnv("AZURITE_ACCOUNTS", ACCOUNT + ":" + KEY_BASE64); | ||
this.setWaitStrategy(new LogMessageWaitStrategy().withRegEx(LOG_WAIT_REGEX)); | ||
} | ||
|
||
public void createStorageContainer() { | ||
serviceClient().createFileSystem(STORAGE_CONTAINER); | ||
} | ||
|
||
public void deleteStorageContainer() { | ||
serviceClient().deleteFileSystem(STORAGE_CONTAINER); | ||
} | ||
|
||
public DataLakeServiceClient serviceClient() { | ||
return new DataLakeServiceClientBuilder() | ||
.endpoint(endpoint()) | ||
.credential(credential()) | ||
.buildClient(); | ||
} | ||
|
||
public String location(String path) { | ||
return String.format("abfs://%s@%s/%s", STORAGE_CONTAINER, ACCOUNT_FQ, path); | ||
} | ||
|
||
public String endpoint() { | ||
return String.format("http://%s/%s", endpointHostPort(), ACCOUNT); | ||
} | ||
|
||
public String endpointHostPort() { | ||
return String.format("%s:%d", getHost(), getMappedPort(DEFAULT_PORT)); | ||
} | ||
|
||
public StorageSharedKeyCredential credential() { | ||
return new StorageSharedKeyCredential(ACCOUNT, KEY_BASE64); | ||
} | ||
|
||
public Map<String, String> hadoopConfig() { | ||
Map<String, String> r = new HashMap<>(); | ||
|
||
r.put("fs.azure.impl", "org.apache.hadoop.fs.azure.AzureNativeFileSystemStore"); | ||
r.put("fs.AbstractFileSystem.azure.impl", "org.apache.hadoop.fs.azurebfs.Abfs"); | ||
|
||
r.put("fs.azure.always.use.https", "false"); | ||
r.put("fs.azure.abfs.endpoint", endpointHostPort()); | ||
|
||
r.put("fs.azure.account.auth.type", "SharedKey"); | ||
r.put("fs.azure.storage.emulator.account.name", ACCOUNT_FQ); | ||
r.put("fs.azure.account.key." + ACCOUNT_FQ, KEY_BASE64); | ||
|
||
return r; | ||
} | ||
} |