From 66dd3085383eb60d4de391191c84364a57e0ada3 Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 3 Sep 2024 11:43:50 +0300 Subject: [PATCH] added list_backups --- src/sempy_labs/__init__.py | 2 + src/sempy_labs/_clear_cache.py | 117 ++++++++++++++---- src/sempy_labs/_helper_functions.py | 29 +++++ .../_migrate_calctables_to_lakehouse.py | 4 +- .../_migrate_calctables_to_semantic_model.py | 4 +- ...migrate_model_objects_to_semantic_model.py | 4 +- ...igrate_tables_columns_to_semantic_model.py | 4 +- .../migration/_migration_validation.py | 4 +- src/sempy_labs/report/_report_rebind.py | 4 +- 9 files changed, 143 insertions(+), 29 deletions(-) diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py index a8146826..46407303 100644 --- a/src/sempy_labs/__init__.py +++ b/src/sempy_labs/__init__.py @@ -26,6 +26,7 @@ backup_semantic_model, restore_semantic_model, copy_semantic_model_backup_file, + list_backups, ) # from sempy_labs._connections import ( @@ -144,6 +145,7 @@ "deprovision_workspace_identity", "list_dataflows", "copy_semantic_model_backup_file", + "list_backups", "backup_semantic_model", "restore_semantic_model", "delete_custom_pool", diff --git a/src/sempy_labs/_clear_cache.py b/src/sempy_labs/_clear_cache.py index 4e593b61..30e7ca04 100644 --- a/src/sempy_labs/_clear_cache.py +++ b/src/sempy_labs/_clear_cache.py @@ -2,6 +2,9 @@ from sempy_labs._helper_functions import resolve_dataset_id, is_default_semantic_model from typing import Optional import sempy_labs._icons as icons +from sempy._utils._log import log +import pandas as pd +from sempy.fabric.exceptions import FabricHTTPException def clear_cache(dataset: str, workspace: Optional[str] = None): @@ -41,6 +44,7 @@ def clear_cache(dataset: str, workspace: Optional[str] = None): ) +@log def backup_semantic_model( dataset: str, file_path: str, @@ -92,6 +96,7 @@ def backup_semantic_model( ) +@log def restore_semantic_model( dataset: str, file_path: str, @@ -152,14 +157,13 @@ def restore_semantic_model( ) +@log def copy_semantic_model_backup_file( source_workspace: str, target_workspace: str, source_file_name: str, target_file_name: str, - storage_account_url: str, - key_vault_uri: str, - key_vault_account_key: str, + storage_account: str, source_file_system: Optional[str] = "power-bi-backup", target_file_system: Optional[str] = "power-bi-backup", ): @@ -168,8 +172,14 @@ def copy_semantic_model_backup_file( Requirements: 1. Must have an Azure storage account and connect it to both the source and target workspace. - 2. Must have an Azure Key Vault. - 3. Must save the Account Key from the Azure storage account as a secret within Azure Key Vault. + 2. Must be a 'Storage Blob Data Contributor' for the storage account. + Steps: + 1. Navigate to the storage account within the Azure Portal + 2. Navigate to 'Access Control (IAM)' + 3. Click '+ Add' -> Add Role Assignment + 4. Search for 'Storage Blob Data Contributor', select it and click 'Next' + 5. Add yourself as a member, click 'Next' + 6. Click 'Review + assign' Parameters ---------- @@ -181,26 +191,17 @@ def copy_semantic_model_backup_file( The name of the source backup file (i.e. MyModel.abf). target_file_name : str The name of the target backup file (i.e. MyModel.abf). - storage_account_url : str - The URL of the storage account. To find this, navigate to the storage account within the Azure Portal. Within 'Endpoints', see the value for the 'Primary Endpoint'. - key_vault_uri : str - The URI of the Azure Key Vault account. - key_vault_account_key : str - The key vault secret name which contains the account key of the Azure storage account. + storage_account : str + The name of the storage account. source_file_system : str, default="power-bi-backup" The container in which the source backup file is located. target_file_system : str, default="power-bi-backup" The container in which the target backup file will be saved. """ - from notebookutils import mssparkutils - from azure.storage.filedatalake import DataLakeServiceClient + from sempy_labs._helper_functions import get_adls_client - account_key = mssparkutils.credentials.getSecret( - key_vault_uri, key_vault_account_key - ) - - suffix = '.abf' + suffix = ".abf" if not source_file_name.endswith(suffix): source_file_name = f"{source_file_name}{suffix}" @@ -209,14 +210,13 @@ def copy_semantic_model_backup_file( source_path = f"/{source_workspace}/{source_file_name}" target_path = f"/{target_workspace}/{target_file_name}" - service_client = DataLakeServiceClient( - account_url=storage_account_url, credential=account_key - ) - source_file_system_client = service_client.get_file_system_client( + client = get_adls_client(account_name=storage_account) + + source_file_system_client = client.get_file_system_client( file_system=source_file_system ) - destination_file_system_client = service_client.get_file_system_client( + destination_file_system_client = client.get_file_system_client( file_system=target_file_system ) @@ -238,3 +238,74 @@ def copy_semantic_model_backup_file( print( f"{icons.green_dot} The backup file of the '{source_file_name}' semantic model from the '{source_workspace}' workspace has been copied as the '{target_file_name}' semantic model backup file within the '{target_workspace}'." ) + + +@log +def list_backups(workspace: Optional[str] = None) -> pd.DataFrame: + + """ + Shows a list of backup files contained within a workspace's ADLS Gen2 storage account. + Requirement: An ADLS Gen2 storage account must be `connected to the workspace `_. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of backup files contained within a workspace's ADLS Gen2 storage account. + """ + + from sempy_labs._helper_functions import get_adls_client + + client = fabric.PowerBIRestClient() + workspace = fabric.resolve_workspace_name(workspace) + workspace_id = fabric.resolve_workspace_id(workspace) + response = client.get( + f"/v1.0/myorg/resources?resourceType=StorageAccount&folderObjectId={workspace_id}" + ) + + if response.status_code != 200: + raise FabricHTTPException(response) + + v = response.json().get("value", []) + if not v: + raise ValueError(f"{icons.red_dot} A storage account is not associated with the '{workspace}' workspace.") + storage_account = v[0]["resourceName"] + + df = pd.DataFrame( + columns=[ + "Storage Account Name", + "File Path", + "File Size", + "Creation Time", + "Last Modified", + "Expiry Time", + "Encryption Scope", + ] + ) + + onelake = get_adls_client(storage_account) + fs = onelake.get_file_system_client("power-bi-backup") + + for x in list(fs.get_paths()): + if not x.is_directory: + new_data = { + "Storage Account Name": storage_account, + "File Path": x.name, + "File Size": x.content_length, + "Creation Time": x.creation_time, + "Last Modified": x.last_modified, + "Expiry Time": x.expiry_time, + "Encryption Scope": x.encryption_scope, + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["File Size"] = df["File Size"].astype(int) + + return df diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 123e3a37..06bcfbf4 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -12,6 +12,9 @@ import sempy_labs._icons as icons from sempy.fabric.exceptions import FabricHTTPException import urllib.parse +from notebookutils import mssparkutils +from azure.core.credentials import TokenCredential, AccessToken +from azure.storage.filedatalake import DataLakeServiceClient def create_abfss_path( @@ -868,3 +871,29 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID: deployment_pipeline_id = dfP_filt["Deployment Pipeline Id"].iloc[0] return deployment_pipeline_id + + +class FabricTokenCredential(TokenCredential): + + def get_token( + self, + scopes: str, + claims: Optional[str] = None, + tenant_id: Optional[str] = None, + enable_cae: Optional[bool] = False, + **kwargs: any, + ) -> AccessToken: + token = mssparkutils.credentials.getToken(scopes) + access_token = AccessToken(token, 0) + + return access_token + + +def get_adls_client(account_name) -> DataLakeServiceClient: + account_url = f"https://{account_name}.dfs.core.windows.net" + + service_client = DataLakeServiceClient( + account_url, credential=FabricTokenCredential() + ) + + return service_client diff --git a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py index 6c60a6b2..0c73f621 100644 --- a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py +++ b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -53,7 +53,9 @@ def migrate_calc_tables_to_lakehouse( """ if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) workspace = fabric.resolve_workspace_name(workspace) diff --git a/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py index 7fce0cbc..fbe35ba9 100644 --- a/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py +++ b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py @@ -49,7 +49,9 @@ def migrate_calc_tables_to_semantic_model( """ if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) workspace = fabric.resolve_workspace_name(workspace) diff --git a/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py index 2f789940..176973b6 100644 --- a/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +++ b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py @@ -44,7 +44,9 @@ def migrate_model_objects_to_semantic_model( import System if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) workspace = fabric.resolve_workspace_name(workspace) diff --git a/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py index 42e2b902..edcf03a7 100644 --- a/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +++ b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py @@ -48,7 +48,9 @@ def migrate_tables_columns_to_semantic_model( """ if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) workspace = fabric.resolve_workspace_name(workspace) diff --git a/src/sempy_labs/migration/_migration_validation.py b/src/sempy_labs/migration/_migration_validation.py index df31a671..eb1f047e 100644 --- a/src/sempy_labs/migration/_migration_validation.py +++ b/src/sempy_labs/migration/_migration_validation.py @@ -38,7 +38,9 @@ def migration_validation( """ if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) workspace = fabric.resolve_workspace_name(workspace) if new_dataset_workspace is None: diff --git a/src/sempy_labs/report/_report_rebind.py b/src/sempy_labs/report/_report_rebind.py index 9a0f39ec..e6e7d23a 100644 --- a/src/sempy_labs/report/_report_rebind.py +++ b/src/sempy_labs/report/_report_rebind.py @@ -102,7 +102,9 @@ def report_rebind_all( from sempy_labs._list_functions import list_reports_using_semantic_model if dataset == new_dataset: - raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.") + raise ValueError( + f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." + ) dataset_workspace = fabric.resolve_workspace_name(dataset_workspace)