From 05383171b6944472c847dfbf8646fe76a2bd2247 Mon Sep 17 00:00:00 2001 From: Marc Cenac Date: Thu, 19 Dec 2024 12:11:16 -0600 Subject: [PATCH] Add fix for applyClientConfiguration from ADLSFileIO --- .../org/apache/iceberg/azure/AzureProperties.java | 8 +++----- .../apache/iceberg/azure/adlsv2/ADLSFileIO.java | 2 +- .../apache/iceberg/azure/adlsv2/ADLSLocation.java | 9 ++++++++- .../apache/iceberg/azure/AzurePropertiesTest.java | 10 ++++------ .../iceberg/azure/adlsv2/ADLSLocationTest.java | 14 ++++++++++++++ 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java index a7f9885a4726..8313000ab35e 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java +++ b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java @@ -81,11 +81,9 @@ public Optional adlsWriteBlockSize() { * Applies configuration to the {@link DataLakeFileSystemClientBuilder} to provide the endpoint * and credentials required to create an instance of the client. * - *

The default endpoint is constructed in the form {@code - * https://{account}.dfs.core.windows.net} and default credentials are provided via the {@link - * com.azure.identity.DefaultAzureCredential}. + *

Default credentials are provided via the {@link com.azure.identity.DefaultAzureCredential}. * - * @param account the service account name + * @param account the service account key (e.g. a hostname or storage account key to get values) * @param builder the builder instance */ public void applyClientConfiguration(String account, DataLakeFileSystemClientBuilder builder) { @@ -104,7 +102,7 @@ public void applyClientConfiguration(String account, DataLakeFileSystemClientBui if (connectionString != null && !connectionString.isEmpty()) { builder.endpoint(connectionString); } else { - builder.endpoint("https://" + account + ".dfs.core.windows.net"); + builder.endpoint("https://" + account); } } } diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java index 0bfce9d6055b..e1bf21f69dc8 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSFileIO.java @@ -111,7 +111,7 @@ DataLakeFileSystemClient client(ADLSLocation location) { new DataLakeFileSystemClientBuilder().httpClient(HTTP); location.container().ifPresent(clientBuilder::fileSystemName); - azureProperties.applyClientConfiguration(location.storageAccount(), clientBuilder); + azureProperties.applyClientConfiguration(location.host(), clientBuilder); return clientBuilder.buildClient(); } diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index fb91c4cb3233..6fe2629774dc 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -49,6 +49,7 @@ class ADLSLocation { private final String storageAccount; private final String container; private final String path; + private final String host; /** * Creates a new ADLSLocation from a fully qualified URI. @@ -66,10 +67,11 @@ class ADLSLocation { String[] parts = authority.split("@", -1); if (parts.length > 1) { this.container = parts[0]; - String host = parts[1]; + this.host = parts[1]; this.storageAccount = host.split("\\.", -1)[0]; } else { this.container = null; + this.host = authority; this.storageAccount = authority.split("\\.", -1)[0]; } @@ -91,4 +93,9 @@ public Optional container() { public String path() { return path; } + + /** Returns ADLS host. */ + public String host() { + return host; + } } diff --git a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java index 4f032d7ab125..6b8287c44e58 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java @@ -97,13 +97,11 @@ public void testNoSasToken() { @Test public void testWithConnectionString() { AzureProperties props = - new AzureProperties( - ImmutableMap.of( - "adls.connection-string.account1", "https://account1.dfs.core.usgovcloudapi.net")); + new AzureProperties(ImmutableMap.of("adls.connection-string.account1", "http://endpoint")); DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("https://account1.dfs.core.usgovcloudapi.net"); + verify(clientBuilder).endpoint("http://endpoint"); } @Test @@ -113,7 +111,7 @@ public void testNoMatchingConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("https://account1.dfs.core.windows.net"); + verify(clientBuilder).endpoint("https://account1"); } @Test @@ -122,7 +120,7 @@ public void testNoConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account", clientBuilder); - verify(clientBuilder).endpoint("https://account.dfs.core.windows.net"); + verify(clientBuilder).endpoint("https://account"); } @Test diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 10b5e1877cca..10bc6184ddc0 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -24,6 +24,7 @@ import org.apache.iceberg.exceptions.ValidationException; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; public class ADLSLocationTest { @@ -100,4 +101,17 @@ public void testQuestionMarkInFileName(String path) { ADLSLocation location = new ADLSLocation(fullPath); assertThat(location.path()).contains(path); } + + @ParameterizedTest + @CsvSource({ + "abfs://container@account.dfs.core.windows.net/file.txt, account.dfs.core.windows.net", + "abfs://container@account.dfs.core.usgovcloudapi.net/file.txt, account.dfs.core.usgovcloudapi.net", + "wasb://container@account.blob.core.windows.net/file.txt, account.blob.core.windows.net", + "abfs://account.dfs.core.windows.net/path, account.dfs.core.windows.net", + "wasb://account.blob.core.windows.net/path, account.blob.core.windows.net" + }) + void testHost(String path, String expectedHost) { + ADLSLocation location = new ADLSLocation(path); + assertThat(location.host()).contains(expectedHost); + } }