From 6eb754bba8f8dc04715c921d5865ff74af9e74e1 Mon Sep 17 00:00:00 2001 From: Pieterjan Spoelders Date: Wed, 8 May 2024 06:55:33 -0400 Subject: [PATCH] add regex for onelake + test --- .../exasol/cloudetl/bucket/AzureAbfsBucket.scala | 5 +++++ .../cloudetl/bucket/AzureAbfsBucketTest.scala | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/main/scala/com/exasol/cloudetl/bucket/AzureAbfsBucket.scala b/src/main/scala/com/exasol/cloudetl/bucket/AzureAbfsBucket.scala index f2329e6f..6195a533 100644 --- a/src/main/scala/com/exasol/cloudetl/bucket/AzureAbfsBucket.scala +++ b/src/main/scala/com/exasol/cloudetl/bucket/AzureAbfsBucket.scala @@ -65,12 +65,17 @@ final case class AzureAbfsBucket(path: String, params: StorageProperties) extend // Intentionally copy-paste, duplicate count: 2. Please, refactor when // it reaches 3+. + // Fabric / OneLake: .dfs.fabric.microsoft.com private[this] final val AZURE_ABFS_PATH_REGEX: Regex = """abfss?://(.*)@([^.]+).dfs.core.windows.net/(.*)$""".r + private[this] final val AZURE_ABFS_ONELAKE_PATH_REGEX: Regex = + """abfss?://(.*)@([^.]+)\.dfs\.fabric\.microsoft\.com/(.*)$""".r private[this] def regexParsePath(path: String): AccountAndContainer = path match { case AZURE_ABFS_PATH_REGEX(containerName, accountName, _) => AccountAndContainer(accountName, containerName) + case AZURE_ABFS_ONELAKE_PATH_REGEX(containerName, accountName, _) => + AccountAndContainer(accountName, containerName) case _ => throw new BucketValidationException( ExaError diff --git a/src/test/scala/com/exasol/cloudetl/bucket/AzureAbfsBucketTest.scala b/src/test/scala/com/exasol/cloudetl/bucket/AzureAbfsBucketTest.scala index d3a08ec7..c5d67c9a 100644 --- a/src/test/scala/com/exasol/cloudetl/bucket/AzureAbfsBucketTest.scala +++ b/src/test/scala/com/exasol/cloudetl/bucket/AzureAbfsBucketTest.scala @@ -36,6 +36,21 @@ class AzureAbfsBucketTest extends AbstractBucketTest { assert(thrown.getMessage().startsWith("E-CSE-20")) assert(thrown.getMessage().contains(s"path '$path' scheme is not valid.")) } + // https://github.com/MicrosoftDocs/fabric-docs/blob/main/docs/onelake/onelake-access-api.md + // "abfss://container1@account1.dfs.core.windows.net/data/" + // The account name is always onelake + // The container name is your workspace name. + // + // From the docs + // -> abfs[s]://@onelake.dfs.fabric.microsoft.com/.// + test("apply throws if Azure OneLake path does not return valid Bucket") { + val path = "abfss://workspacename@onelake.dfs.fabric.microsoft.com/item.itemtype/path/filename" + val exaMetadata = mockConnectionInfo("", "AZURE_SECRET_KEY=secret") + properties = defaultProperties ++ Map(PATH -> path, "CONNECTION_NAME" -> "connection_info") + + val bucket = getBucket(properties, exaMetadata) + assert(bucket.isInstanceOf[AzureAbfsBucket]) + } test("apply throws if no connection name is provided") { properties = defaultProperties