From 9c441bb5f9098d484343ea93b878cb016e07ddc1 Mon Sep 17 00:00:00 2001 From: Pang Wu <104795337+pang-wu@users.noreply.github.com> Date: Mon, 15 Apr 2024 15:56:59 -0700 Subject: [PATCH] deltalake _delta_lake.py: Allow Glue catalog cross account access (#2113) This PR sets the account Id into glue call, so users can access catalog in another account. Tested in our dev environment. --- daft/io/_delta_lake.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/daft/io/_delta_lake.py b/daft/io/_delta_lake.py index 009649c89a..28e6a6be18 100644 --- a/daft/io/_delta_lake.py +++ b/daft/io/_delta_lake.py @@ -71,7 +71,14 @@ def read_delta_lake( aws_secret_access_key=s3_config.access_key, aws_session_token=s3_config.session_token, ) - glue_table = glue.get_table(DatabaseName=table.database_name, Name=table.table_name) + if table.catalog_id is not None: + # Allow cross account access, table.catalog_id should be the target account id + glue_table = glue.get_table( + CatalogId=table.catalog_id, DatabaseName=table.database_name, Name=table.table_name + ) + else: + glue_table = glue.get_table(DatabaseName=table.database_name, Name=table.table_name) + # TODO(Clark): Fetch more than just the table URI from Glue Data Catalog. table_uri = glue_table["Table"]["StorageDescriptor"]["Location"] elif table.catalog == DataCatalogType.UNITY: