Skip to content

Commit

Permalink
added list_backups
Browse files Browse the repository at this point in the history
  • Loading branch information
m-kovalsky committed Sep 3, 2024
1 parent cf93d70 commit 66dd308
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 29 deletions.
2 changes: 2 additions & 0 deletions src/sempy_labs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
backup_semantic_model,
restore_semantic_model,
copy_semantic_model_backup_file,
list_backups,
)

# from sempy_labs._connections import (
Expand Down Expand Up @@ -144,6 +145,7 @@
"deprovision_workspace_identity",
"list_dataflows",
"copy_semantic_model_backup_file",
"list_backups",
"backup_semantic_model",
"restore_semantic_model",
"delete_custom_pool",
Expand Down
117 changes: 94 additions & 23 deletions src/sempy_labs/_clear_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from sempy_labs._helper_functions import resolve_dataset_id, is_default_semantic_model
from typing import Optional
import sempy_labs._icons as icons
from sempy._utils._log import log
import pandas as pd
from sempy.fabric.exceptions import FabricHTTPException


def clear_cache(dataset: str, workspace: Optional[str] = None):
Expand Down Expand Up @@ -41,6 +44,7 @@ def clear_cache(dataset: str, workspace: Optional[str] = None):
)


@log
def backup_semantic_model(
dataset: str,
file_path: str,
Expand Down Expand Up @@ -92,6 +96,7 @@ def backup_semantic_model(
)


@log
def restore_semantic_model(
dataset: str,
file_path: str,
Expand Down Expand Up @@ -152,14 +157,13 @@ def restore_semantic_model(
)


@log
def copy_semantic_model_backup_file(
source_workspace: str,
target_workspace: str,
source_file_name: str,
target_file_name: str,
storage_account_url: str,
key_vault_uri: str,
key_vault_account_key: str,
storage_account: str,
source_file_system: Optional[str] = "power-bi-backup",
target_file_system: Optional[str] = "power-bi-backup",
):
Expand All @@ -168,8 +172,14 @@ def copy_semantic_model_backup_file(
Requirements:
1. Must have an Azure storage account and connect it to both the source and target workspace.
2. Must have an Azure Key Vault.
3. Must save the Account Key from the Azure storage account as a secret within Azure Key Vault.
2. Must be a 'Storage Blob Data Contributor' for the storage account.
Steps:
1. Navigate to the storage account within the Azure Portal
2. Navigate to 'Access Control (IAM)'
3. Click '+ Add' -> Add Role Assignment
4. Search for 'Storage Blob Data Contributor', select it and click 'Next'
5. Add yourself as a member, click 'Next'
6. Click 'Review + assign'
Parameters
----------
Expand All @@ -181,26 +191,17 @@ def copy_semantic_model_backup_file(
The name of the source backup file (i.e. MyModel.abf).
target_file_name : str
The name of the target backup file (i.e. MyModel.abf).
storage_account_url : str
The URL of the storage account. To find this, navigate to the storage account within the Azure Portal. Within 'Endpoints', see the value for the 'Primary Endpoint'.
key_vault_uri : str
The URI of the Azure Key Vault account.
key_vault_account_key : str
The key vault secret name which contains the account key of the Azure storage account.
storage_account : str
The name of the storage account.
source_file_system : str, default="power-bi-backup"
The container in which the source backup file is located.
target_file_system : str, default="power-bi-backup"
The container in which the target backup file will be saved.
"""

from notebookutils import mssparkutils
from azure.storage.filedatalake import DataLakeServiceClient
from sempy_labs._helper_functions import get_adls_client

account_key = mssparkutils.credentials.getSecret(
key_vault_uri, key_vault_account_key
)

suffix = '.abf'
suffix = ".abf"

if not source_file_name.endswith(suffix):
source_file_name = f"{source_file_name}{suffix}"
Expand All @@ -209,14 +210,13 @@ def copy_semantic_model_backup_file(

source_path = f"/{source_workspace}/{source_file_name}"
target_path = f"/{target_workspace}/{target_file_name}"
service_client = DataLakeServiceClient(
account_url=storage_account_url, credential=account_key
)

source_file_system_client = service_client.get_file_system_client(
client = get_adls_client(account_name=storage_account)

source_file_system_client = client.get_file_system_client(
file_system=source_file_system
)
destination_file_system_client = service_client.get_file_system_client(
destination_file_system_client = client.get_file_system_client(
file_system=target_file_system
)

Expand All @@ -238,3 +238,74 @@ def copy_semantic_model_backup_file(
print(
f"{icons.green_dot} The backup file of the '{source_file_name}' semantic model from the '{source_workspace}' workspace has been copied as the '{target_file_name}' semantic model backup file within the '{target_workspace}'."
)


@log
def list_backups(workspace: Optional[str] = None) -> pd.DataFrame:

"""
Shows a list of backup files contained within a workspace's ADLS Gen2 storage account.
Requirement: An ADLS Gen2 storage account must be `connected to the workspace <https://learn.microsoft.com/power-bi/transform-model/dataflows/dataflows-azure-data-lake-storage-integration#connect-to-an-azure-data-lake-gen-2-at-a-workspace-level>`_.
Parameters
----------
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.
Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of backup files contained within a workspace's ADLS Gen2 storage account.
"""

from sempy_labs._helper_functions import get_adls_client

client = fabric.PowerBIRestClient()
workspace = fabric.resolve_workspace_name(workspace)
workspace_id = fabric.resolve_workspace_id(workspace)
response = client.get(
f"/v1.0/myorg/resources?resourceType=StorageAccount&folderObjectId={workspace_id}"
)

if response.status_code != 200:
raise FabricHTTPException(response)

v = response.json().get("value", [])
if not v:
raise ValueError(f"{icons.red_dot} A storage account is not associated with the '{workspace}' workspace.")
storage_account = v[0]["resourceName"]

df = pd.DataFrame(
columns=[
"Storage Account Name",
"File Path",
"File Size",
"Creation Time",
"Last Modified",
"Expiry Time",
"Encryption Scope",
]
)

onelake = get_adls_client(storage_account)
fs = onelake.get_file_system_client("power-bi-backup")

for x in list(fs.get_paths()):
if not x.is_directory:
new_data = {
"Storage Account Name": storage_account,
"File Path": x.name,
"File Size": x.content_length,
"Creation Time": x.creation_time,
"Last Modified": x.last_modified,
"Expiry Time": x.expiry_time,
"Encryption Scope": x.encryption_scope,
}

df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)

df["File Size"] = df["File Size"].astype(int)

return df
29 changes: 29 additions & 0 deletions src/sempy_labs/_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import sempy_labs._icons as icons
from sempy.fabric.exceptions import FabricHTTPException
import urllib.parse
from notebookutils import mssparkutils
from azure.core.credentials import TokenCredential, AccessToken
from azure.storage.filedatalake import DataLakeServiceClient


def create_abfss_path(
Expand Down Expand Up @@ -868,3 +871,29 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
deployment_pipeline_id = dfP_filt["Deployment Pipeline Id"].iloc[0]

return deployment_pipeline_id


class FabricTokenCredential(TokenCredential):

def get_token(
self,
scopes: str,
claims: Optional[str] = None,
tenant_id: Optional[str] = None,
enable_cae: Optional[bool] = False,
**kwargs: any,
) -> AccessToken:
token = mssparkutils.credentials.getToken(scopes)
access_token = AccessToken(token, 0)

return access_token


def get_adls_client(account_name) -> DataLakeServiceClient:
account_url = f"https://{account_name}.dfs.core.windows.net"

service_client = DataLakeServiceClient(
account_url, credential=FabricTokenCredential()
)

return service_client
4 changes: 3 additions & 1 deletion src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def migrate_calc_tables_to_lakehouse(
"""

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

workspace = fabric.resolve_workspace_name(workspace)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ def migrate_calc_tables_to_semantic_model(
"""

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

workspace = fabric.resolve_workspace_name(workspace)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def migrate_model_objects_to_semantic_model(
import System

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

workspace = fabric.resolve_workspace_name(workspace)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ def migrate_tables_columns_to_semantic_model(
"""

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

workspace = fabric.resolve_workspace_name(workspace)

Expand Down
4 changes: 3 additions & 1 deletion src/sempy_labs/migration/_migration_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ def migration_validation(
"""

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

workspace = fabric.resolve_workspace_name(workspace)
if new_dataset_workspace is None:
Expand Down
4 changes: 3 additions & 1 deletion src/sempy_labs/report/_report_rebind.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def report_rebind_all(
from sempy_labs._list_functions import list_reports_using_semantic_model

if dataset == new_dataset:
raise ValueError(f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values.")
raise ValueError(
f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values."
)

dataset_workspace = fabric.resolve_workspace_name(dataset_workspace)

Expand Down

0 comments on commit 66dd308

Please sign in to comment.