added list_backups

microsoft · Sep 3, 2024 · 66dd308 · 66dd308
1 parent cf93d70
commit 66dd308
Show file tree

Hide file tree

Showing 9 changed files with 143 additions and 29 deletions.
diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py
@@ -26,6 +26,7 @@
     backup_semantic_model,
     restore_semantic_model,
     copy_semantic_model_backup_file,
+    list_backups,
 )
 
 # from sempy_labs._connections import (
@@ -144,6 +145,7 @@
     "deprovision_workspace_identity",
     "list_dataflows",
     "copy_semantic_model_backup_file",
+    "list_backups",
     "backup_semantic_model",
     "restore_semantic_model",
     "delete_custom_pool",

diff --git a/src/sempy_labs/_clear_cache.py b/src/sempy_labs/_clear_cache.py
@@ -2,6 +2,9 @@
 from sempy_labs._helper_functions import resolve_dataset_id, is_default_semantic_model
 from typing import Optional
 import sempy_labs._icons as icons
+from sempy._utils._log import log
+import pandas as pd
+from sempy.fabric.exceptions import FabricHTTPException
 
 
 def clear_cache(dataset: str, workspace: Optional[str] = None):
@@ -41,6 +44,7 @@ def clear_cache(dataset: str, workspace: Optional[str] = None):
     )
 
 
+@log
 def backup_semantic_model(
     dataset: str,
     file_path: str,
@@ -92,6 +96,7 @@ def backup_semantic_model(
     )
 
 
+@log
 def restore_semantic_model(
     dataset: str,
     file_path: str,
@@ -152,14 +157,13 @@ def restore_semantic_model(
     )
 
 
+@log
 def copy_semantic_model_backup_file(
     source_workspace: str,
     target_workspace: str,
     source_file_name: str,
     target_file_name: str,
-    storage_account_url: str,
-    key_vault_uri: str,
-    key_vault_account_key: str,
+    storage_account: str,
     source_file_system: Optional[str] = "power-bi-backup",
     target_file_system: Optional[str] = "power-bi-backup",
 ):
@@ -168,8 +172,14 @@ def copy_semantic_model_backup_file(
 
     Requirements:
         1. Must have an Azure storage account and connect it to both the source and target workspace.
-        2. Must have an Azure Key Vault.
-        3. Must save the Account Key from the Azure storage account as a secret within Azure Key Vault.
+        2. Must be a 'Storage Blob Data Contributor' for the storage account.
+            Steps:
+                1. Navigate to the storage account within the Azure Portal
+                2. Navigate to 'Access Control (IAM)'
+                3. Click '+ Add' -> Add Role Assignment
+                4. Search for 'Storage Blob Data Contributor', select it and click 'Next'
+                5. Add yourself as a member, click 'Next'
+                6. Click 'Review + assign'
 
     Parameters
     ----------
@@ -181,26 +191,17 @@ def copy_semantic_model_backup_file(
         The name of the source backup file (i.e. MyModel.abf).
     target_file_name : str
         The name of the target backup file (i.e. MyModel.abf).
-    storage_account_url : str
-        The URL of the storage account. To find this, navigate to the storage account within the Azure Portal. Within 'Endpoints', see the value for the 'Primary Endpoint'.
-    key_vault_uri : str
-        The URI of the Azure Key Vault account.
-    key_vault_account_key : str
-        The key vault secret name which contains the account key of the Azure storage account.
+    storage_account : str
+        The name of the storage account.
     source_file_system : str, default="power-bi-backup"
         The container in which the source backup file is located.
     target_file_system : str, default="power-bi-backup"
         The container in which the target backup file will be saved.
     """
 
-    from notebookutils import mssparkutils
-    from azure.storage.filedatalake import DataLakeServiceClient
+    from sempy_labs._helper_functions import get_adls_client
 
-    account_key = mssparkutils.credentials.getSecret(
-        key_vault_uri, key_vault_account_key
-    )
-
-    suffix = '.abf'
+    suffix = ".abf"
 
     if not source_file_name.endswith(suffix):
         source_file_name = f"{source_file_name}{suffix}"
@@ -209,14 +210,13 @@ def copy_semantic_model_backup_file(
 
     source_path = f"/{source_workspace}/{source_file_name}"
     target_path = f"/{target_workspace}/{target_file_name}"
-    service_client = DataLakeServiceClient(
-        account_url=storage_account_url, credential=account_key
-    )
 
-    source_file_system_client = service_client.get_file_system_client(
+    client = get_adls_client(account_name=storage_account)
+
+    source_file_system_client = client.get_file_system_client(
         file_system=source_file_system
     )
-    destination_file_system_client = service_client.get_file_system_client(
+    destination_file_system_client = client.get_file_system_client(
         file_system=target_file_system
     )
 
@@ -238,3 +238,74 @@ def copy_semantic_model_backup_file(
     print(
         f"{icons.green_dot} The backup file of the '{source_file_name}' semantic model from the '{source_workspace}' workspace has been copied as the '{target_file_name}' semantic model backup file within the '{target_workspace}'."
     )
+
+
+@log
+def list_backups(workspace: Optional[str] = None) -> pd.DataFrame:
+
+    """
+    Shows a list of backup files contained within a workspace's ADLS Gen2 storage account.
+    Requirement: An ADLS Gen2 storage account must be `connected to the workspace <https://learn.microsoft.com/power-bi/transform-model/dataflows/dataflows-azure-data-lake-storage-integration#connect-to-an-azure-data-lake-gen-2-at-a-workspace-level>`_.
+
+    Parameters
+    ----------
+    workspace : str, default=None
+        The Fabric workspace name.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+
+    Returns
+    -------
+    pandas.DataFrame
+        A pandas dataframe showing a list of backup files contained within a workspace's ADLS Gen2 storage account.
+    """
+
+    from sempy_labs._helper_functions import get_adls_client
+
+    client = fabric.PowerBIRestClient()
+    workspace = fabric.resolve_workspace_name(workspace)
+    workspace_id = fabric.resolve_workspace_id(workspace)
+    response = client.get(
+        f"/v1.0/myorg/resources?resourceType=StorageAccount&folderObjectId={workspace_id}"
+    )
+
+    if response.status_code != 200:
+        raise FabricHTTPException(response)
+
+    v = response.json().get("value", [])
+    if not v:
+        raise ValueError(f"{icons.red_dot} A storage account is not associated with the '{workspace}' workspace.")
+    storage_account = v[0]["resourceName"]
+
+    df = pd.DataFrame(
+        columns=[
+            "Storage Account Name",
+            "File Path",
+            "File Size",
+            "Creation Time",
+            "Last Modified",
+            "Expiry Time",
+            "Encryption Scope",
+        ]
+    )
+
+    onelake = get_adls_client(storage_account)
+    fs = onelake.get_file_system_client("power-bi-backup")
+
+    for x in list(fs.get_paths()):
+        if not x.is_directory:
+            new_data = {
+                "Storage Account Name": storage_account,
+                "File Path": x.name,
+                "File Size": x.content_length,
+                "Creation Time": x.creation_time,
+                "Last Modified": x.last_modified,
+                "Expiry Time": x.expiry_time,
+                "Encryption Scope": x.encryption_scope,
+            }
+
+            df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)
+
+    df["File Size"] = df["File Size"].astype(int)
+
+    return df
diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py
@@ -12,6 +12,9 @@
 import sempy_labs._icons as icons
 from sempy.fabric.exceptions import FabricHTTPException
 import urllib.parse
+from notebookutils import mssparkutils
+from azure.core.credentials import TokenCredential, AccessToken
+from azure.storage.filedatalake import DataLakeServiceClient
 
 
 def create_abfss_path(
@@ -868,3 +871,29 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
     deployment_pipeline_id = dfP_filt["Deployment Pipeline Id"].iloc[0]
 
     return deployment_pipeline_id
+
+
+class FabricTokenCredential(TokenCredential):
+
+    def get_token(
+        self,
+        scopes: str,
+        claims: Optional[str] = None,
+        tenant_id: Optional[str] = None,
+        enable_cae: Optional[bool] = False,
+        **kwargs: any,
+    ) -> AccessToken:
+        token = mssparkutils.credentials.getToken(scopes)
+        access_token = AccessToken(token, 0)
+
+        return access_token
+
+
+def get_adls_client(account_name) -> DataLakeServiceClient:
+    account_url = f"https://{account_name}.dfs.core.windows.net"
+
+    service_client = DataLakeServiceClient(
+        account_url, credential=FabricTokenCredential()
+    )
+
+    return service_client
diff --git a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py
@@ -53,7 +53,9 @@ def migrate_calc_tables_to_lakehouse(
     """
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     workspace = fabric.resolve_workspace_name(workspace)
 

diff --git a/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py
@@ -49,7 +49,9 @@ def migrate_calc_tables_to_semantic_model(
     """
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     workspace = fabric.resolve_workspace_name(workspace)
 

diff --git a/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py
@@ -44,7 +44,9 @@ def migrate_model_objects_to_semantic_model(
     import System
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     workspace = fabric.resolve_workspace_name(workspace)
 

diff --git a/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py
@@ -48,7 +48,9 @@ def migrate_tables_columns_to_semantic_model(
     """
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     workspace = fabric.resolve_workspace_name(workspace)
 

diff --git a/src/sempy_labs/migration/_migration_validation.py b/src/sempy_labs/migration/_migration_validation.py
@@ -38,7 +38,9 @@ def migration_validation(
     """
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     workspace = fabric.resolve_workspace_name(workspace)
     if new_dataset_workspace is None:

diff --git a/src/sempy_labs/report/_report_rebind.py b/src/sempy_labs/report/_report_rebind.py
@@ -102,7 +102,9 @@ def report_rebind_all(
     from sempy_labs._list_functions import list_reports_using_semantic_model
 
     if dataset == new_dataset:
-        raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
+        raise ValueError(
+            f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
+        )
 
     dataset_workspace = fabric.resolve_workspace_name(dataset_workspace)