Skip to content

Commit

Permalink
Merge branch 'm-kovalsky/improvecopybackupfile'
Browse files Browse the repository at this point in the history
  • Loading branch information
m-kovalsky committed Sep 5, 2024
2 parents d553d02 + 3593025 commit 590041a
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 25 deletions.
4 changes: 4 additions & 0 deletions src/sempy_labs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
backup_semantic_model,
restore_semantic_model,
copy_semantic_model_backup_file,
list_backups,
list_storage_account_files,
)

# from sempy_labs._connections import (
Expand Down Expand Up @@ -146,6 +148,8 @@
"deprovision_workspace_identity",
"list_dataflows",
"copy_semantic_model_backup_file",
"list_backups",
"list_storage_account_files",
"backup_semantic_model",
"restore_semantic_model",
"delete_custom_pool",
Expand Down
143 changes: 118 additions & 25 deletions src/sempy_labs/_clear_cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import sempy.fabric as fabric
from sempy_labs._helper_functions import resolve_dataset_id, is_default_semantic_model
from sempy_labs._helper_functions import resolve_dataset_id, is_default_semantic_model, get_adls_client
from typing import Optional
import sempy_labs._icons as icons
from sempy._utils._log import log
import pandas as pd
from sempy.fabric.exceptions import FabricHTTPException


def clear_cache(dataset: str, workspace: Optional[str] = None):
Expand Down Expand Up @@ -41,6 +44,7 @@ def clear_cache(dataset: str, workspace: Optional[str] = None):
)


@log
def backup_semantic_model(
dataset: str,
file_path: str,
Expand Down Expand Up @@ -92,6 +96,7 @@ def backup_semantic_model(
)


@log
def restore_semantic_model(
dataset: str,
file_path: str,
Expand Down Expand Up @@ -152,14 +157,13 @@ def restore_semantic_model(
)


@log
def copy_semantic_model_backup_file(
source_workspace: str,
target_workspace: str,
source_file_name: str,
target_file_name: str,
storage_account_url: str,
key_vault_uri: str,
key_vault_account_key: str,
storage_account: str,
source_file_system: Optional[str] = "power-bi-backup",
target_file_system: Optional[str] = "power-bi-backup",
):
Expand All @@ -168,8 +172,14 @@ def copy_semantic_model_backup_file(
Requirements:
1. Must have an Azure storage account and connect it to both the source and target workspace.
2. Must have an Azure Key Vault.
3. Must save the Account Key from the Azure storage account as a secret within Azure Key Vault.
2. Must be a 'Storage Blob Data Contributor' for the storage account.
Steps:
1. Navigate to the storage account within the Azure Portal
2. Navigate to 'Access Control (IAM)'
3. Click '+ Add' -> Add Role Assignment
4. Search for 'Storage Blob Data Contributor', select it and click 'Next'
5. Add yourself as a member, click 'Next'
6. Click 'Review + assign'
Parameters
----------
Expand All @@ -181,24 +191,13 @@ def copy_semantic_model_backup_file(
The name of the source backup file (i.e. MyModel.abf).
target_file_name : str
The name of the target backup file (i.e. MyModel.abf).
storage_account_url : str
The URL of the storage account. To find this, navigate to the storage account within the Azure Portal. Within 'Endpoints', see the value for the 'Primary Endpoint'.
key_vault_uri : str
The URI of the Azure Key Vault account.
key_vault_account_key : str
The key vault secret name which contains the account key of the Azure storage account.
storage_account : str
The name of the storage account.
source_file_system : str, default="power-bi-backup"
The container in which the source backup file is located.
target_file_system : str, default="power-bi-backup"
The container in which the target backup file will be saved.
"""

from notebookutils import mssparkutils
from azure.storage.filedatalake import DataLakeServiceClient

account_key = mssparkutils.credentials.getSecret(
key_vault_uri, key_vault_account_key
)
"""

suffix = ".abf"

Expand All @@ -209,14 +208,13 @@ def copy_semantic_model_backup_file(

source_path = f"/{source_workspace}/{source_file_name}"
target_path = f"/{target_workspace}/{target_file_name}"
service_client = DataLakeServiceClient(
account_url=storage_account_url, credential=account_key
)

source_file_system_client = service_client.get_file_system_client(
client = get_adls_client(account_name=storage_account)

source_file_system_client = client.get_file_system_client(
file_system=source_file_system
)
destination_file_system_client = service_client.get_file_system_client(
destination_file_system_client = client.get_file_system_client(
file_system=target_file_system
)

Expand All @@ -238,3 +236,98 @@ def copy_semantic_model_backup_file(
print(
f"{icons.green_dot} The backup file of the '{source_file_name}' semantic model from the '{source_workspace}' workspace has been copied as the '{target_file_name}' semantic model backup file within the '{target_workspace}'."
)


@log
def list_backups(workspace: Optional[str] = None) -> pd.DataFrame:
"""
Shows a list of backup files contained within a workspace's ADLS Gen2 storage account.
Requirement: An ADLS Gen2 storage account must be `connected to the workspace <https://learn.microsoft.com/power-bi/transform-model/dataflows/dataflows-azure-data-lake-storage-integration#connect-to-an-azure-data-lake-gen-2-at-a-workspace-level>`_.
Parameters
----------
workspace : str, default=None
The Fabric workspace name.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.
Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of backup files contained within a workspace's ADLS Gen2 storage account.
"""

client = fabric.PowerBIRestClient()
workspace = fabric.resolve_workspace_name(workspace)
workspace_id = fabric.resolve_workspace_id(workspace)
response = client.get(
f"/v1.0/myorg/resources?resourceType=StorageAccount&folderObjectId={workspace_id}"
)

if response.status_code != 200:
raise FabricHTTPException(response)

v = response.json().get("value", [])
if not v:
raise ValueError(
f"{icons.red_dot} A storage account is not associated with the '{workspace}' workspace."
)
storage_account = v[0]["resourceName"]

df = list_storage_account_files(storage_account=storage_account)
colName = "Storage Account Name"
df.insert(0, colName, df.pop(colName))

return df


@log
def list_storage_account_files(
storage_account: str, container: Optional[str] = "power-bi-backup"
) -> pd.DataFrame:
"""
Shows a list of files within an ADLS Gen2 storage account.
Parameters
----------
storage_account: str
The name of the ADLS Gen2 storage account.
container : str, default='power-bi-backup'
The name of the container.
Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of files contained within an ADLS Gen2 storage account.
"""

df = pd.DataFrame(
columns=[
"File Path",
"File Size",
"Creation Time",
"Last Modified",
"Expiry Time",
"Encryption Scope",
]
)

onelake = get_adls_client(storage_account)
fs = onelake.get_file_system_client(container)

for x in list(fs.get_paths()):
if not x.is_directory:
new_data = {
"File Path": x.name,
"File Size": x.content_length,
"Creation Time": x.creation_time,
"Last Modified": x.last_modified,
"Expiry Time": x.expiry_time,
"Encryption Scope": x.encryption_scope,
}

df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)

df["File Size"] = df["File Size"].astype(int)

return df
29 changes: 29 additions & 0 deletions src/sempy_labs/_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import sempy_labs._icons as icons
from sempy.fabric.exceptions import FabricHTTPException
import urllib.parse
from notebookutils import mssparkutils
from azure.core.credentials import TokenCredential, AccessToken
from azure.storage.filedatalake import DataLakeServiceClient


def create_abfss_path(
Expand Down Expand Up @@ -868,3 +871,29 @@ def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID:
deployment_pipeline_id = dfP_filt["Deployment Pipeline Id"].iloc[0]

return deployment_pipeline_id


class FabricTokenCredential(TokenCredential):

def get_token(
self,
scopes: str,
claims: Optional[str] = None,
tenant_id: Optional[str] = None,
enable_cae: Optional[bool] = False,
**kwargs: any,
) -> AccessToken:
token = mssparkutils.credentials.getToken(scopes)
access_token = AccessToken(token, 0)

return access_token


def get_adls_client(account_name) -> DataLakeServiceClient:
account_url = f"https://{account_name}.dfs.core.windows.net"

service_client = DataLakeServiceClient(
account_url, credential=FabricTokenCredential()
)

return service_client

0 comments on commit 590041a

Please sign in to comment.