From b0a6e6b62a93fb22858d0ffd7ad506179dbbedb5 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 25 Sep 2024 16:45:40 +0200 Subject: [PATCH 01/12] cleaned up function description sections --- src/sempy_labs/_icons.py | 8 ++ src/sempy_labs/_list_functions.py | 14 ++-- src/sempy_labs/_model_auto_build.py | 4 - src/sempy_labs/_notebooks.py | 7 +- src/sempy_labs/_refresh_semantic_model.py | 13 +--- src/sempy_labs/_workspaces.py | 3 - src/sempy_labs/admin/_basic_functions.py | 92 ++++++++++++++++++++++- 7 files changed, 105 insertions(+), 36 deletions(-) diff --git a/src/sempy_labs/_icons.py b/src/sempy_labs/_icons.py index f7e7ab18..ac717ae9 100644 --- a/src/sempy_labs/_icons.py +++ b/src/sempy_labs/_icons.py @@ -93,3 +93,11 @@ "P4": "F512", "P5": "F1024", } +refreshTypes = [ + "full", + "automatic", + "dataOnly", + "calculate", + "clearValues", + "defragment", +] diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py index 59c2e294..2cf91119 100644 --- a/src/sempy_labs/_list_functions.py +++ b/src/sempy_labs/_list_functions.py @@ -8,7 +8,6 @@ lro, resolve_item_type, format_dax_object_name, - pagination, ) import pandas as pd from typing import Optional @@ -802,7 +801,7 @@ def list_sqlendpoints(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_mirroredwarehouses(workspace: Optional[str] = None) -> pd.DataFrame: +def list_mirrored_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the mirrored warehouses within a workspace. @@ -845,7 +844,7 @@ def list_mirroredwarehouses(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_kqldatabases(workspace: Optional[str] = None) -> pd.DataFrame: +def list_kql_databases(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the KQL databases within a workspace. @@ -901,7 +900,7 @@ def list_kqldatabases(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_kqlquerysets(workspace: Optional[str] = None) -> pd.DataFrame: +def list_kql_querysets(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the KQL Querysets within a workspace. @@ -942,7 +941,7 @@ def list_kqlquerysets(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_mlmodels(workspace: Optional[str] = None) -> pd.DataFrame: +def list_ml_models(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the ML models within a workspace. @@ -1066,7 +1065,7 @@ def list_datapipelines(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_mlexperiments(workspace: Optional[str] = None) -> pd.DataFrame: +def list_ml_experiments(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the ML experiments within a workspace. @@ -1688,9 +1687,6 @@ def list_capacities() -> pd.DataFrame: """ Shows the capacities and their properties. - Parameters - ---------- - Returns ------- pandas.DataFrame diff --git a/src/sempy_labs/_model_auto_build.py b/src/sempy_labs/_model_auto_build.py index 27c89698..c939cc80 100644 --- a/src/sempy_labs/_model_auto_build.py +++ b/src/sempy_labs/_model_auto_build.py @@ -34,10 +34,6 @@ def model_auto_build( The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ workspace = fabric.resolve_workspace_name(workspace) diff --git a/src/sempy_labs/_notebooks.py b/src/sempy_labs/_notebooks.py index 4ca15e83..2bc20841 100644 --- a/src/sempy_labs/_notebooks.py +++ b/src/sempy_labs/_notebooks.py @@ -14,7 +14,7 @@ def get_notebook_definition( notebook_name: str, workspace: Optional[str] = None, decode: Optional[bool] = True -): +) -> str: """ Obtains the notebook definition. @@ -32,7 +32,7 @@ def get_notebook_definition( Returns ------- - ipynb + str The notebook definition. """ @@ -90,9 +90,6 @@ def import_notebook_from_web( The name of the workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- """ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) diff --git a/src/sempy_labs/_refresh_semantic_model.py b/src/sempy_labs/_refresh_semantic_model.py index eccb683b..aa7be252 100644 --- a/src/sempy_labs/_refresh_semantic_model.py +++ b/src/sempy_labs/_refresh_semantic_model.py @@ -74,18 +74,9 @@ def extract_names(partition): refresh_type.lower().replace("only", "Only").replace("values", "Values") ) - refreshTypes = [ - "full", - "automatic", - "dataOnly", - "calculate", - "clearValues", - "defragment", - ] - - if refresh_type not in refreshTypes: + if refresh_type not in icons.refreshTypes: raise ValueError( - f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}." + f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {icons.refreshTypes}." ) if len(objects) == 0: diff --git a/src/sempy_labs/_workspaces.py b/src/sempy_labs/_workspaces.py index aa62ecf7..19e1edfd 100644 --- a/src/sempy_labs/_workspaces.py +++ b/src/sempy_labs/_workspaces.py @@ -22,9 +22,6 @@ def delete_user_from_workspace(email_address: str, workspace: Optional[str] = No The name of the workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- """ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) diff --git a/src/sempy_labs/admin/_basic_functions.py b/src/sempy_labs/admin/_basic_functions.py index d2c879fa..a7b9fe8f 100644 --- a/src/sempy_labs/admin/_basic_functions.py +++ b/src/sempy_labs/admin/_basic_functions.py @@ -3,7 +3,10 @@ from uuid import UUID import sempy_labs._icons as icons from sempy.fabric.exceptions import FabricHTTPException -from sempy_labs._helper_functions import resolve_workspace_name_and_id, pagination +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + pagination, +) import datetime import numpy as np import pandas as pd @@ -519,6 +522,16 @@ def scan_workspaces( def list_datasets() -> pd.DataFrame: + """ + Shows a list of datasets for the organization. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of datasets for the organization. + """ + + # https://learn.microsoft.com/en-us/rest/api/power-bi/admin/datasets-get-datasets-as-admin df = pd.DataFrame( columns=[ @@ -544,8 +557,7 @@ def list_datasets() -> pd.DataFrame: ] ) - client = fabric.FabricRestClient() - + client = fabric.PowerBIRestClient() response = client.get("/v1.0/myorg/admin/datasets") if response.status_code != 200: @@ -600,6 +612,25 @@ def list_datasets() -> pd.DataFrame: def list_item_access_details( item_name: str, type: str, workspace: Optional[str] = None ) -> pd.DataFrame: + """ + Returns a list of users (including groups and service principals) and lists their workspace roles. + + Parameters + ---------- + item_name : str + Name of the Fabric item. + type : str + Type of Fabric item. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of users (including groups and service principals) and lists their workspace roles. + """ # https://learn.microsoft.com/en-us/rest/api/fabric/admin/items/list-item-access-details?tabs=HTTP @@ -652,6 +683,19 @@ def list_item_access_details( def list_access_entities( user_email_address: str, ) -> pd.DataFrame: + """ + Shows a list of permission details for Fabric and PowerBI items the specified user can access. + + Parameters + ---------- + user_email_address : str + The user's email address. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of permission details for Fabric and PowerBI items the specified user can access. + """ # https://learn.microsoft.com/en-us/rest/api/fabric/admin/users/list-access-entities?tabs=HTTP @@ -691,8 +735,24 @@ def list_access_entities( def list_workspace_access_details( workspace: Optional[Union[str, UUID]] = None ) -> pd.DataFrame: + + """ + Shows a list of users (including groups and Service Principals) that have access to the specified workspace. - # https://learn.microsoft.com/en-us/rest/api/fabric/admin/items/list-items?tabs=HTTP + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of users (including groups and Service Principals) that have access to the specified workspace. + """ + + # https://learn.microsoft.com/en-us/rest/api/fabric/admin/workspaces/list-workspace-access-details?tabs=HTTP workspace_name = fabric.resolve_workspace_name(workspace) workspace_id = fabric.resolve_workspace_id(workspace_name) @@ -733,6 +793,30 @@ def list_items( type: Optional[str] = None, ) -> pd.DataFrame: + """ + Shows a list of active Fabric and PowerBI items. + + Parameters + ---------- + capacity_name : str, default=None + The capacity name. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + state : str, default=None + The item state. + type : str, default=None + The item type. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of active Fabric and Power BI items. + """ + + # https://learn.microsoft.com/en-us/rest/api/fabric/admin/items/list-items?tabs=HTTP + url = "/v1/admin/items?" df = pd.DataFrame( From 484adfce404a994cd242b4b90b00be265a79be1e Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 26 Sep 2024 00:39:40 +0200 Subject: [PATCH 02/12] added duckdb for getting RunId, cleaned up parameters --- docs/requirements.txt | 3 +- pyproject.toml | 3 +- src/sempy_labs/_generate_semantic_model.py | 6 +-- src/sempy_labs/_git.py | 2 +- src/sempy_labs/_helper_functions.py | 6 +-- src/sempy_labs/_list_functions.py | 10 ++--- src/sempy_labs/_model_bpa.py | 25 ++++++----- src/sempy_labs/_model_bpa_bulk.py | 2 +- src/sempy_labs/_notebooks.py | 2 +- src/sempy_labs/_query_scale_out.py | 4 +- src/sempy_labs/_refresh_semantic_model.py | 13 +++--- src/sempy_labs/_spark.py | 8 ++-- src/sempy_labs/_vertipaq.py | 37 ++++++++-------- src/sempy_labs/admin/_basic_functions.py | 16 +++---- src/sempy_labs/admin/_domains.py | 2 +- .../directlake/_directlake_schema_sync.py | 2 +- src/sempy_labs/directlake/_dl_helper.py | 4 +- .../_update_directlake_partition_entity.py | 2 +- src/sempy_labs/directlake/_warm_cache.py | 2 +- .../lakehouse/_get_lakehouse_tables.py | 6 +-- src/sempy_labs/lakehouse/_lakehouse.py | 6 +-- src/sempy_labs/report/_generate_report.py | 2 +- src/sempy_labs/report/_report_bpa.py | 4 +- src/sempy_labs/tom/_model.py | 44 +++++++++---------- 24 files changed, 106 insertions(+), 105 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b0d6ad46..bf5422a9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -11,4 +11,5 @@ anytree IPython polib azure.mgmt.resource -jsonpath_ng \ No newline at end of file +jsonpath_ng +duckdb \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 804d532a..04dbb94b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "polib", "azure.mgmt.resource", "jsonpath_ng", + "duckdb", ] [tool.setuptools.packages.find] @@ -46,7 +47,7 @@ test = [ Repository = "https://github.com/microsoft/semantic-link-labs.git" [[tool.mypy.overrides]] -module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*" +module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*,duckdb.*" ignore_missing_imports = true [tool.flake8] diff --git a/src/sempy_labs/_generate_semantic_model.py b/src/sempy_labs/_generate_semantic_model.py index 915be014..ba0614e4 100644 --- a/src/sempy_labs/_generate_semantic_model.py +++ b/src/sempy_labs/_generate_semantic_model.py @@ -20,7 +20,7 @@ def create_blank_semantic_model( dataset: str, compatibility_level: int = 1605, workspace: Optional[str] = None, - overwrite: Optional[bool] = True, + overwrite: bool = True, ): """ Creates a new blank semantic model (no tables/columns etc.). @@ -212,8 +212,8 @@ def deploy_semantic_model( source_workspace: Optional[str] = None, target_dataset: Optional[str] = None, target_workspace: Optional[str] = None, - refresh_target_dataset: Optional[bool] = True, - overwrite: Optional[bool] = False, + refresh_target_dataset: bool = True, + overwrite: bool = False, ): """ Deploys a semantic model based on an existing semantic model. diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py index f20e8822..d58faf22 100644 --- a/src/sempy_labs/_git.py +++ b/src/sempy_labs/_git.py @@ -314,7 +314,7 @@ def update_from_git( remote_commit_hash: str, conflict_resolution_policy: str, workspace_head: Optional[str] = None, - allow_override: Optional[bool] = False, + allow_override: bool = False, workspace: Optional[str] = None, ): """ diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index a80dda1f..3f0dea9d 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -390,7 +390,7 @@ def save_as_delta_table( dataframe, delta_table_name: str, write_mode: str, - merge_schema: Optional[bool] = False, + merge_schema: bool = False, schema: Optional[dict] = None, lakehouse: Optional[str] = None, workspace: Optional[str] = None, @@ -869,7 +869,7 @@ def lro( response, status_codes: Optional[List[str]] = [200, 202], sleep_time: Optional[int] = 1, - return_status_code: Optional[bool] = False, + return_status_code: bool = False, ): if response.status_code not in status_codes: @@ -943,7 +943,7 @@ def get_token( scopes: str, claims: Optional[str] = None, tenant_id: Optional[str] = None, - enable_cae: Optional[bool] = False, + enable_cae: bool = False, **kwargs: any, ) -> AccessToken: diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py index 2cf91119..31d1e0d3 100644 --- a/src/sempy_labs/_list_functions.py +++ b/src/sempy_labs/_list_functions.py @@ -83,7 +83,7 @@ def get_object_level_security( def list_tables( - dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False + dataset: str, workspace: Optional[str] = None, extended: bool = False ) -> pd.DataFrame: """ Shows a semantic model's tables and their properties. @@ -1263,7 +1263,7 @@ def update_item( def list_relationships( - dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False + dataset: str, workspace: Optional[str] = None, extended: bool = False ) -> pd.DataFrame: """ Shows a semantic model's relationships and their properties. @@ -1774,7 +1774,7 @@ def list_reports_using_semantic_model( def list_report_semantic_model_objects( - dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False + dataset: str, workspace: Optional[str] = None, extended: bool = False ) -> pd.DataFrame: """ Shows a list of semantic model objects (i.e. columns, measures, hierarchies) used in all reports which feed data from @@ -1868,8 +1868,8 @@ def list_report_semantic_model_objects( def list_semantic_model_object_report_usage( dataset: str, workspace: Optional[str] = None, - include_dependencies: Optional[bool] = False, - extended: Optional[bool] = False, + include_dependencies: bool = False, + extended: bool = False, ) -> pd.DataFrame: """ Shows a list of semantic model objects and how many times they are referenced in all reports which rely on this semantic model. diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index 4f75327d..1db199a7 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -3,7 +3,6 @@ import warnings import datetime from IPython.display import display, HTML -from pyspark.sql import SparkSession from sempy_labs._model_dependencies import get_model_calc_dependencies from sempy_labs._helper_functions import ( format_dax_object_name, @@ -14,7 +13,10 @@ resolve_dataset_id, get_language_codes, ) -from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached +from sempy_labs.lakehouse import ( + get_lakehouse_tables, + lakehouse_attached +) from sempy_labs.tom import connect_semantic_model from sempy_labs._model_bpa_rules import model_bpa_rules from typing import Optional @@ -23,6 +25,7 @@ from pyspark.sql.functions import col, flatten from pyspark.sql.types import StructType, StructField, StringType import os +import duckdb @log @@ -30,9 +33,9 @@ def run_model_bpa( dataset: str, rules: Optional[pd.DataFrame] = None, workspace: Optional[str] = None, - export: Optional[bool] = False, - return_dataframe: Optional[bool] = False, - extended: Optional[bool] = False, + export: bool = False, + return_dataframe: bool = False, + extended: bool = False, language: Optional[str] = None, **kwargs, ): @@ -151,6 +154,7 @@ def translate_using_po(rule_file): def translate_using_spark(rule_file): from synapse.ml.services import Translate + from pyspark.sql import SparkSession rules_temp = rule_file.copy() rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1) @@ -346,15 +350,14 @@ def translate_using_spark(rule_file): dfExport["Severity"].replace(icons.severity_mapping, inplace=True) - spark = SparkSession.builder.getOrCreate() - query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}" - if len(lakeT_filt) == 0: runId = 1 else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 + x = duckdb.sql( + f"""SELECT max(RunId) as max_run_id FROM delta_scan('/lakehouse/default/Tables/{delta_table_name}/') """ + ).fetchall() + max_run_id = x[0][0] + runId = max_run_id + 1 now = datetime.datetime.now() dfD = fabric.list_datasets(workspace=workspace, mode="rest") diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py index cd420ad9..b2221da2 100644 --- a/src/sempy_labs/_model_bpa_bulk.py +++ b/src/sempy_labs/_model_bpa_bulk.py @@ -18,7 +18,7 @@ @log def run_model_bpa_bulk( rules: Optional[pd.DataFrame] = None, - extended: Optional[bool] = False, + extended: bool = False, language: Optional[str] = None, workspace: Optional[str | List[str]] = None, skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"], diff --git a/src/sempy_labs/_notebooks.py b/src/sempy_labs/_notebooks.py index 2bc20841..20e10595 100644 --- a/src/sempy_labs/_notebooks.py +++ b/src/sempy_labs/_notebooks.py @@ -13,7 +13,7 @@ def get_notebook_definition( - notebook_name: str, workspace: Optional[str] = None, decode: Optional[bool] = True + notebook_name: str, workspace: Optional[str] = None, decode: bool = True ) -> str: """ Obtains the notebook definition. diff --git a/src/sempy_labs/_query_scale_out.py b/src/sempy_labs/_query_scale_out.py index e0f1a319..47154315 100644 --- a/src/sempy_labs/_query_scale_out.py +++ b/src/sempy_labs/_query_scale_out.py @@ -181,8 +181,8 @@ def disable_qso(dataset: str, workspace: Optional[str] = None) -> pd.DataFrame: def set_qso( dataset: str, - auto_sync: Optional[bool] = True, - max_read_only_replicas: Optional[int] = -1, + auto_sync: bool = True, + max_read_only_replicas: int = -1, workspace: Optional[str] = None, ) -> pd.DataFrame: """ diff --git a/src/sempy_labs/_refresh_semantic_model.py b/src/sempy_labs/_refresh_semantic_model.py index aa7be252..f80baf5f 100644 --- a/src/sempy_labs/_refresh_semantic_model.py +++ b/src/sempy_labs/_refresh_semantic_model.py @@ -13,10 +13,10 @@ def refresh_semantic_model( dataset: str, tables: Optional[Union[str, List[str]]] = None, partitions: Optional[Union[str, List[str]]] = None, - refresh_type: Optional[str] = None, - retry_count: Optional[int] = 0, - apply_refresh_policy: Optional[bool] = True, - max_parallelism: Optional[int] = 10, + refresh_type: str = "full", + retry_count: int = 0, + apply_refresh_policy: bool = True, + max_parallelism: int = 10, workspace: Optional[str] = None, ): """ @@ -30,7 +30,7 @@ def refresh_semantic_model( A string or a list of tables to refresh. partitions: str, List[str], default=None A string or a list of partitions to refresh. Partitions must be formatted as such: 'Table Name'[Partition Name]. - refresh_type : str, default='full' + refresh_type : str, default="full" The type of processing to perform. Types align with the TMSL refresh command types: full, clearValues, calculate, dataOnly, automatic, and defragment. The add type isn't supported. Defaults to "full". retry_count : int, default=0 Number of times the operation retries before failing. @@ -48,9 +48,6 @@ def refresh_semantic_model( workspace = fabric.resolve_workspace_name(workspace) - if refresh_type is None: - refresh_type = "full" - if isinstance(tables, str): tables = [tables] if isinstance(partitions, str): diff --git a/src/sempy_labs/_spark.py b/src/sempy_labs/_spark.py index 3b8efcbe..0fa2933b 100644 --- a/src/sempy_labs/_spark.py +++ b/src/sempy_labs/_spark.py @@ -91,9 +91,9 @@ def create_custom_pool( max_node_count: int, min_executors: int, max_executors: int, - node_family: Optional[str] = "MemoryOptimized", - auto_scale_enabled: Optional[bool] = True, - dynamic_executor_allocation_enabled: Optional[bool] = True, + node_family: str = "MemoryOptimized", + auto_scale_enabled: bool = True, + dynamic_executor_allocation_enabled: bool = True, workspace: Optional[str] = None, ): """ @@ -299,7 +299,7 @@ def delete_custom_pool(pool_name: str, workspace: Optional[str] = None): def get_spark_settings( - workspace: Optional[str] = None, return_dataframe: Optional[bool] = True + workspace: Optional[str] = None, return_dataframe: bool = True ) -> pd.DataFrame | dict: """ Shows the spark settings for a workspace. diff --git a/src/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py index 63abb3da..245cec78 100644 --- a/src/sempy_labs/_vertipaq.py +++ b/src/sempy_labs/_vertipaq.py @@ -20,6 +20,7 @@ from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons +import duckdb @log @@ -27,7 +28,7 @@ def vertipaq_analyzer( dataset: str, workspace: Optional[str] = None, export: Optional[str] = None, - read_stats_from_data: Optional[bool] = False, + read_stats_from_data: bool = False, **kwargs, ): """ @@ -336,10 +337,10 @@ def vertipaq_analyzer( int_cols.append(k) elif v in ["float", "double"] and k != "Temperature": pct_cols.append(k) - colSize[int_cols] = colSize[int_cols].applymap("{:,}".format) - temp[int_cols] = temp[int_cols].applymap("{:,}".format) - colSize[pct_cols] = colSize[pct_cols].applymap("{:.2f}%".format) - temp[pct_cols] = temp[pct_cols].applymap("{:.2f}%".format) + colSize[int_cols] = colSize[int_cols].map("{:,}".format) + temp[int_cols] = temp[int_cols].map("{:,}".format) + colSize[pct_cols] = colSize[pct_cols].map("{:.2f}%".format) + temp[pct_cols] = temp[pct_cols].map("{:.2f}%".format) # Tables int_cols = [] @@ -351,8 +352,8 @@ def vertipaq_analyzer( pct_cols.append(k) export_Table = dfT.copy() - dfT[int_cols] = dfT[int_cols].applymap("{:,}".format) - dfT[pct_cols] = dfT[pct_cols].applymap("{:.2f}%".format) + dfT[int_cols] = dfT[int_cols].map("{:,}".format) + dfT[pct_cols] = dfT[pct_cols].map("{:.2f}%".format) # Relationships dfR = pd.merge( @@ -391,7 +392,7 @@ def vertipaq_analyzer( int_cols.append(k) if not read_stats_from_data: int_cols.remove("Missing Rows") - dfR[int_cols] = dfR[int_cols].applymap("{:,}".format) + dfR[int_cols] = dfR[int_cols].map("{:,}".format) # Partitions dfP = dfP[ @@ -414,7 +415,7 @@ def vertipaq_analyzer( if v in ["int", "long", "double", "float"]: int_cols.append(k) intList = ["Record Count", "Segment Count", "Records per Segment"] - dfP[intList] = dfP[intList].applymap("{:,}".format) + dfP[intList] = dfP[intList].map("{:,}".format) # Hierarchies dfH_filt = dfH[dfH["Level Ordinal"] == 0] @@ -426,7 +427,7 @@ def vertipaq_analyzer( dfH_filt["Used Size"] = dfH_filt["Used Size"].astype(int) export_Hier = dfH_filt.copy() intList = ["Used Size"] - dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format) + dfH_filt[intList] = dfH_filt[intList].map("{:,}".format) # Model # Converting to KB/MB/GB necessitates division by 1024 * 1000. @@ -456,7 +457,7 @@ def vertipaq_analyzer( for k, v in vertipaq_map["Model"].items(): if v in ["long", "int"] and k != "Compatibility Level": int_cols.append(k) - dfModel[int_cols] = dfModel[int_cols].applymap("{:,}".format) + dfModel[int_cols] = dfModel[int_cols].map("{:,}".format) dataFrames = { "dfModel": dfModel, @@ -483,26 +484,26 @@ def vertipaq_analyzer( ) if export == "table": - spark = SparkSession.builder.getOrCreate() + # spark = SparkSession.builder.getOrCreate() lakehouse_id = fabric.get_lakehouse_id() lake_workspace = fabric.resolve_workspace_name() lakehouse = resolve_lakehouse_name( lakehouse_id=lakehouse_id, workspace=lake_workspace ) - lakeTName = "vertipaq_analyzer_model" + lakeTName = "vertipaqanalyzer_model" lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace) lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName] - query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" - if len(lakeT_filt) == 0: runId = 1 else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 + x = duckdb.sql( + f"""SELECT max(RunId) as max_run_id FROM delta_scan('/lakehouse/default/Tables/{lakeTName}/') """ + ).fetchall() + max_run_id = x[0][0] + runId = max_run_id + 1 dfMap = { "Columns": ["Columns", export_Col], diff --git a/src/sempy_labs/admin/_basic_functions.py b/src/sempy_labs/admin/_basic_functions.py index a7b9fe8f..8c5c9a3a 100644 --- a/src/sempy_labs/admin/_basic_functions.py +++ b/src/sempy_labs/admin/_basic_functions.py @@ -390,7 +390,7 @@ def revoke_external_data_share( def list_capacities_delegated_tenant_settings( - return_dataframe: Optional[bool] = True, + return_dataframe: bool = True, ) -> Optional[pd.DataFrame | dict]: """ Returns list of tenant setting overrides that override at the capacities. @@ -480,11 +480,11 @@ def list_capacities_delegated_tenant_settings( def scan_workspaces( - data_source_details: Optional[bool] = False, - dataset_schema: Optional[bool] = False, - dataset_expressions: Optional[bool] = False, - lineage: Optional[bool] = False, - artifact_users: Optional[bool] = False, + data_source_details: bool = False, + dataset_schema: bool = False, + dataset_expressions: bool = False, + lineage: bool = False, + artifact_users: bool = False, workspace: Optional[str | List[str]] = None, ) -> dict: @@ -735,7 +735,6 @@ def list_access_entities( def list_workspace_access_details( workspace: Optional[Union[str, UUID]] = None ) -> pd.DataFrame: - """ Shows a list of users (including groups and Service Principals) that have access to the specified workspace. @@ -792,7 +791,6 @@ def list_items( state: Optional[str] = None, type: Optional[str] = None, ) -> pd.DataFrame: - """ Shows a list of active Fabric and PowerBI items. @@ -816,7 +814,7 @@ def list_items( """ # https://learn.microsoft.com/en-us/rest/api/fabric/admin/items/list-items?tabs=HTTP - + url = "/v1/admin/items?" df = pd.DataFrame( diff --git a/src/sempy_labs/admin/_domains.py b/src/sempy_labs/admin/_domains.py index c055f522..83480c05 100644 --- a/src/sempy_labs/admin/_domains.py +++ b/src/sempy_labs/admin/_domains.py @@ -30,7 +30,7 @@ def resolve_domain_id(domain_name: str) -> UUID: return dfL_filt["Domain ID"].iloc[0] -def list_domains(non_empty_only: Optional[bool] = False) -> pd.DataFrame: +def list_domains(non_empty_only: bool = False) -> pd.DataFrame: """ Shows a list of domains. diff --git a/src/sempy_labs/directlake/_directlake_schema_sync.py b/src/sempy_labs/directlake/_directlake_schema_sync.py index 4b4b898b..043d21c9 100644 --- a/src/sempy_labs/directlake/_directlake_schema_sync.py +++ b/src/sempy_labs/directlake/_directlake_schema_sync.py @@ -12,7 +12,7 @@ def direct_lake_schema_sync( dataset: str, workspace: Optional[str] = None, - add_to_model: Optional[bool] = False, + add_to_model: bool = False, **kwargs, ): """ diff --git a/src/sempy_labs/directlake/_dl_helper.py b/src/sempy_labs/directlake/_dl_helper.py index 9436fbb5..7a30837e 100644 --- a/src/sempy_labs/directlake/_dl_helper.py +++ b/src/sempy_labs/directlake/_dl_helper.py @@ -73,8 +73,8 @@ def generate_direct_lake_semantic_model( workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None, - overwrite: Optional[bool] = False, - refresh: Optional[bool] = True, + overwrite: bool = False, + refresh: bool = True, ): """ Dynamically generates a Direct Lake semantic model based on tables in a Fabric lakehouse. diff --git a/src/sempy_labs/directlake/_update_directlake_partition_entity.py b/src/sempy_labs/directlake/_update_directlake_partition_entity.py index bc72e852..81bcb14e 100644 --- a/src/sempy_labs/directlake/_update_directlake_partition_entity.py +++ b/src/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -100,7 +100,7 @@ def add_table_to_direct_lake_semantic_model( dataset: str, table_name: str, lakehouse_table_name: str, - refresh: Optional[bool] = True, + refresh: bool = True, workspace: Optional[str] = None, ): """ diff --git a/src/sempy_labs/directlake/_warm_cache.py b/src/sempy_labs/directlake/_warm_cache.py index a9914214..be800ecc 100644 --- a/src/sempy_labs/directlake/_warm_cache.py +++ b/src/sempy_labs/directlake/_warm_cache.py @@ -15,7 +15,7 @@ def warm_direct_lake_cache_perspective( dataset: str, perspective: str, - add_dependencies: Optional[bool] = False, + add_dependencies: bool = False, workspace: Optional[str] = None, ) -> pd.DataFrame: """ diff --git a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py index e78f5981..a555c9d8 100644 --- a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py +++ b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -24,9 +24,9 @@ def get_lakehouse_tables( lakehouse: Optional[str] = None, workspace: Optional[str] = None, - extended: Optional[bool] = False, - count_rows: Optional[bool] = False, - export: Optional[bool] = False, + extended: bool = False, + count_rows: bool = False, + export: bool = False, ) -> pd.DataFrame: """ Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. diff --git a/src/sempy_labs/lakehouse/_lakehouse.py b/src/sempy_labs/lakehouse/_lakehouse.py index 61b1b1f7..5dc9bb93 100644 --- a/src/sempy_labs/lakehouse/_lakehouse.py +++ b/src/sempy_labs/lakehouse/_lakehouse.py @@ -1,9 +1,7 @@ import sempy.fabric as fabric from tqdm.auto import tqdm -from pyspark.sql import SparkSession from sempy_labs._helper_functions import resolve_lakehouse_name from typing import List, Optional, Union -import sempy_labs._icons as icons from sempy._utils._log import log @@ -16,7 +14,7 @@ def lakehouse_attached() -> bool: bool Returns True if a lakehouse is attached to the notebook. """ - + from pyspark.sql import SparkSession spark = SparkSession.builder.getOrCreate() lakeId = spark.conf.get("trident.lakehouse.id") @@ -49,6 +47,7 @@ def optimize_lakehouse_tables( or if no lakehouse attached, resolves to the workspace of the notebook. """ + from pyspark.sql import SparkSession from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables from delta import DeltaTable @@ -107,6 +106,7 @@ def vacuum_lakehouse_tables( The default retention period is 168 hours (7 days) unless manually configured via table properties. """ + from pyspark.sql import SparkSession from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables from delta import DeltaTable diff --git a/src/sempy_labs/report/_generate_report.py b/src/sempy_labs/report/_generate_report.py index 5d0b229b..f2ef47ed 100644 --- a/src/sempy_labs/report/_generate_report.py +++ b/src/sempy_labs/report/_generate_report.py @@ -313,7 +313,7 @@ def _create_report( dataset: str, dataset_workspace: Optional[str] = None, report_workspace: Optional[str] = None, - update_if_exists: Optional[bool] = False, + update_if_exists: bool = False, ): from sempy_labs.report import report_rebind diff --git a/src/sempy_labs/report/_report_bpa.py b/src/sempy_labs/report/_report_bpa.py index ac096ce5..148e7d35 100644 --- a/src/sempy_labs/report/_report_bpa.py +++ b/src/sempy_labs/report/_report_bpa.py @@ -22,8 +22,8 @@ def run_report_bpa( rules: Optional[pd.DataFrame] = None, workspace: Optional[str] = None, # language: Optional[str] = None, - export: Optional[bool] = False, - return_dataframe: Optional[bool] = False, + export: bool = False, + return_dataframe: bool = False, ): """ Displays an HTML visualization of the results of the Best Practice Analyzer scan for a report. diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index e95a874c..15b97e15 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -226,7 +226,7 @@ def add_measure( measure_name: str, expression: str, format_string: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, description: Optional[str] = None, display_folder: Optional[str] = None, format_string_expression: Optional[str] = None, @@ -301,11 +301,11 @@ def add_calculated_table_column( source_column: str, data_type: str, format_string: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, - key: Optional[bool] = False, + key: bool = False, summarize_by: Optional[str] = None, lineage_tag: Optional[str] = None, source_lineage_tag: Optional[str] = None, @@ -387,11 +387,11 @@ def add_data_column( source_column: str, data_type: str, format_string: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, - key: Optional[bool] = False, + key: bool = False, summarize_by: Optional[str] = None, lineage_tag: Optional[str] = None, source_lineage_tag: Optional[str] = None, @@ -473,11 +473,11 @@ def add_calculated_column( expression: str, data_type: str, format_string: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, - key: Optional[bool] = False, + key: bool = False, summarize_by: Optional[str] = None, lineage_tag: Optional[str] = None, source_lineage_tag: Optional[str] = None, @@ -708,7 +708,7 @@ def add_hierarchy( columns: List[str], levels: Optional[List[str]] = None, hierarchy_description: Optional[str] = None, - hierarchy_hidden: Optional[bool] = False, + hierarchy_hidden: bool = False, lineage_tag: Optional[str] = None, source_lineage_tag: Optional[str] = None, ): @@ -781,9 +781,9 @@ def add_relationship( from_cardinality: str, to_cardinality: str, cross_filtering_behavior: Optional[str] = None, - is_active: Optional[bool] = True, + is_active: bool = True, security_filtering_behavior: Optional[str] = None, - rely_on_referential_integrity: Optional[bool] = False, + rely_on_referential_integrity: bool = False, ): """ Adds a `relationship `_ to a semantic model. @@ -855,7 +855,7 @@ def add_calculation_group( name: str, precedence: int, description: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, ): """ Adds a `calculation group `_ to a semantic model. @@ -2483,7 +2483,7 @@ def set_aggregations(self, table_name: str, agg_table_name: str): ) def set_is_available_in_mdx( - self, table_name: str, column_name: str, value: Optional[bool] = False + self, table_name: str, column_name: str, value: bool = False ): """ Sets the `IsAvailableInMDX `_ property on a column. @@ -2586,7 +2586,7 @@ def add_table( name: str, description: Optional[str] = None, data_category: Optional[str] = None, - hidden: Optional[bool] = False, + hidden: bool = False, lineage_tag: Optional[str] = None, source_lineage_tag: Optional[str] = None, ): @@ -2629,9 +2629,9 @@ def add_calculated_table( expression: str, description: Optional[str] = None, data_category: Optional[str] = None, - hidden: Optional[bool] = False, - lineage_tag: Optional[bool] = None, - source_lineage_tag: Optional[bool] = None, + hidden: bool = False, + lineage_tag: Optional[str] = None, + source_lineage_tag: Optional[str] = None, ): """ Adds a calculated table to the semantic model. @@ -3378,7 +3378,7 @@ def update_incremental_refresh_policy( incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, - only_refresh_complete_days: Optional[bool] = False, + only_refresh_complete_days: bool = False, detect_data_changes_column: Optional[str] = None, ): """ @@ -3483,7 +3483,7 @@ def add_incremental_refresh_policy( incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, - only_refresh_complete_days: Optional[bool] = False, + only_refresh_complete_days: bool = False, detect_data_changes_column: Optional[str] = None, ): """ @@ -3652,7 +3652,7 @@ def apply_refresh_policy( self, table_name: str, effective_date: Optional[datetime] = None, - refresh: Optional[bool] = True, + refresh: bool = True, max_parallelism: Optional[int] = 0, ): """ @@ -3905,7 +3905,7 @@ def update_measure( measure_name: str, expression: Optional[str] = None, format_string: Optional[str] = None, - hidden: Optional[bool] = None, + hidden: bool = None, description: Optional[str] = None, display_folder: Optional[str] = None, format_string_expression: Optional[str] = None, @@ -3964,11 +3964,11 @@ def update_column( data_type: Optional[str] = None, expression: Optional[str] = None, format_string: Optional[str] = None, - hidden: Optional[bool] = None, + hidden: bool = None, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, - key: Optional[bool] = None, + key: bool = None, summarize_by: Optional[str] = None, ): """ From bae7fb759589d1c5f8c192f49cb1e7a33015bc11 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 26 Sep 2024 10:27:35 +0200 Subject: [PATCH 03/12] fixed git issue 172 --- README.md | 2 +- src/sempy_labs/_git.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cf669fcf..5ca4c84c 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ An even better way to ensure the semantic-link-labs library is available in your 2. Select your newly created environment within the 'Environment' drop down in the navigation bar at the top of the notebook ## Version History -* [0.8.0](https://github.com/microsoft/semantic-link-labs/releases/tag/0.8.0) (September 24, 2024) +* [0.8.0](https://github.com/microsoft/semantic-link-labs/releases/tag/0.8.0) (September 25, 2024) * [0.7.4](https://github.com/microsoft/semantic-link-labs/releases/tag/0.7.4) (September 16, 2024) * [0.7.3](https://github.com/microsoft/semantic-link-labs/releases/tag/0.7.3) (September 11, 2024) * [0.7.2](https://github.com/microsoft/semantic-link-labs/releases/tag/0.7.2) (August 30, 2024) diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py index d58faf22..19f3d6a3 100644 --- a/src/sempy_labs/_git.py +++ b/src/sempy_labs/_git.py @@ -341,9 +341,9 @@ def update_from_git( workspace, workspace_id = resolve_workspace_name_and_id(workspace) conflict_resolution_policies = ["PreferWorkspace", "PreferRemote"] - if "remote" in conflict_resolution_policies.lower(): + if "remote" in [policy.lower() for policy in conflict_resolution_policies]: conflict_resolution_policies = "PreferRemote" - elif "workspace" in conflict_resolution_policies.lower(): + elif "workspace" in [policy.lower() for policy in conflict_resolution_policies]: conflict_resolution_policies = "PreferWorkspace" if conflict_resolution_policy not in conflict_resolution_policies: From 0fb272d65cf5588a064523be1068a46d0700a682 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 27 Sep 2024 00:42:32 +0200 Subject: [PATCH 04/12] added api functions --- src/sempy_labs/__init__.py | 12 ++- src/sempy_labs/_kql_databases.py | 137 +++++++++++++++++++++++++ src/sempy_labs/_list_functions.py | 99 ------------------ src/sempy_labs/_mirrored_warehouses.py | 50 +++++++++ 4 files changed, 197 insertions(+), 101 deletions(-) create mode 100644 src/sempy_labs/_kql_databases.py create mode 100644 src/sempy_labs/_mirrored_warehouses.py diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py index d5a8ddca..59541dd8 100644 --- a/src/sempy_labs/__init__.py +++ b/src/sempy_labs/__init__.py @@ -1,3 +1,9 @@ +from sempy_labs._kql_databases import ( + list_kql_databases, + create_kql_database, + delete_kql_database, +) +from sempy_labs._mirrored_warehouses import list_mirrored_warehouses from sempy_labs._environments import ( create_environment, delete_environment, @@ -114,10 +120,8 @@ # list_datapipelines, # list_eventstreams, # list_kpis, - # list_kqldatabases, # list_kqlquerysets, list_lakehouses, - # list_mirroredwarehouses, # list_mlexperiments, # list_mlmodels, # list_relationships, @@ -340,4 +344,8 @@ "check_fabric_capacity_name_availablility", "delete_embedded_capacity", "delete_premium_capacity", + "list_mirrored_warehouses", + "list_kql_databases", + "create_kql_database", + "delete_kql_database", ] diff --git a/src/sempy_labs/_kql_databases.py b/src/sempy_labs/_kql_databases.py new file mode 100644 index 00000000..3f5c74bb --- /dev/null +++ b/src/sempy_labs/_kql_databases.py @@ -0,0 +1,137 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_kql_databases(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the KQL databases within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the KQL databases within a workspace. + """ + + df = pd.DataFrame( + columns=[ + "KQL Database Name", + "KQL Database Id", + "Description", + "Parent Eventhouse Item Id", + "Query Service URI", + "Ingestion Service URI", + "Database Type", + ] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/kqlDatabases") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + prop = v.get("properties", {}) + + new_data = { + "KQL Database Name": v.get("displayName"), + "KQL Database Id": v.get("id"), + "Description": v.get("description"), + "Parent Eventhouse Item Id": prop.get("parentEventhouseItemId"), + "Query Service URI": prop.get("queryServiceUri"), + "Ingestion Service URI": prop.get("ingestionServiceUri"), + "Database Type": prop.get("databaseType"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_kql_database( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a KQL database. + + Parameters + ---------- + name: str + Name of the KQL database. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/environments", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' KQL database has been created within the '{workspace}' workspace." + ) + + +def delete_kql_database(name: str, workspace: Optional[str] = None): + """ + Deletes a KQL database. + + Parameters + ---------- + name: str + Name of the KQL database. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + dfK = list_kql_databases(workspace=workspace) + dfK_filt = dfK[dfK['KQL Database Name'] == name] + + if len(dfK_filt) == 0: + raise ValueError(f"{icons.red_dot} The '{name}' KQL database does not exist within the '{workspace}' workspace.") + kql_database_id = dfK_filt['KQL Database Id'].iloc[0] + + client = fabric.FabricRestClient() + response = client.delete( + f"/v1/workspaces/{workspace_id}/kqlDatabases/{kql_database_id}" + ) + + if response.status_code != 200: + raise FabricHTTPException(response) + print( + f"{icons.green_dot} The '{name}' KQL database within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py index 31d1e0d3..183a0d96 100644 --- a/src/sempy_labs/_list_functions.py +++ b/src/sempy_labs/_list_functions.py @@ -801,105 +801,6 @@ def list_sqlendpoints(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_mirrored_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the mirrored warehouses within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the mirrored warehouses within a workspace. - """ - - df = pd.DataFrame( - columns=["Mirrored Warehouse", "Mirrored Warehouse ID", "Description"] - ) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/mirroredWarehouses") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - - new_data = { - "Mirrored Warehouse": v.get("displayName"), - "Mirrored Warehouse ID": v.get("id"), - "Description": v.get("description"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_kql_databases(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the KQL databases within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the KQL Databases within a workspace. - """ - - df = pd.DataFrame( - columns=[ - "KQL Database Name", - "KQL Database ID", - "Description", - "Parent Eventhouse Item ID", - "Query Service URI", - "Ingestion Service URI", - "Kusto Database Type", - ] - ) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/kqlDatabases") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - prop = v.get("properties", {}) - - new_data = { - "KQL Database Name": v.get("displayName"), - "KQL Database ID": v.get("id"), - "Description": v.get("description"), - "Parent Eventhouse Item ID": prop.get("parentEventhouseItemId"), - "Query Service URI": prop.get("queryServiceUri"), - "Ingestion Service URI": prop.get("ingestionServiceUri"), - "Kusto Database Type": prop.get("kustoDatabaseType"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - def list_kql_querysets(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the KQL Querysets within a workspace. diff --git a/src/sempy_labs/_mirrored_warehouses.py b/src/sempy_labs/_mirrored_warehouses.py new file mode 100644 index 00000000..f85277f7 --- /dev/null +++ b/src/sempy_labs/_mirrored_warehouses.py @@ -0,0 +1,50 @@ +import sempy.fabric as fabric +import pandas as pd +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_mirrored_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the mirrored warehouses within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the mirrored warehouses within a workspace. + """ + + df = pd.DataFrame( + columns=["Mirrored Warehouse Name", "Mirrored Warehouse Id", "Description"] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/mirroredWarehouses") + if response.status_code != 200: + raise FabricHTTPException(response) + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + + new_data = { + "Mirrored Warehouse Name": v.get("displayName"), + "Mirrored Warehouse Id": v.get("id"), + "Description": v.get("description"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df From 0dd8fc80add9a8117020c98e430b061dffecf879 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 27 Sep 2024 14:39:26 +0200 Subject: [PATCH 05/12] added additional api functions, updated readme --- README.md | 26 +- notebooks/Tabular Object Model.ipynb | 2 +- src/sempy_labs/__init__.py | 75 +++++- src/sempy_labs/_ai.py | 2 - src/sempy_labs/_data_pipelines.py | 118 +++++++++ src/sempy_labs/_documentation.py | 118 +++++++++ src/sempy_labs/_eventhouses.py | 118 +++++++++ src/sempy_labs/_eventstreams.py | 118 +++++++++ src/sempy_labs/_helper_functions.py | 15 ++ src/sempy_labs/_icons.py | 12 + src/sempy_labs/_kql_databases.py | 11 +- src/sempy_labs/_kql_querysets.py | 124 ++++++++++ src/sempy_labs/_list_functions.py | 324 +------------------------ src/sempy_labs/_ml_experiments.py | 122 ++++++++++ src/sempy_labs/_ml_models.py | 120 +++++++++ src/sempy_labs/_model_bpa.py | 5 +- src/sempy_labs/_notebooks.py | 12 +- src/sempy_labs/_warehouses.py | 132 ++++++++++ src/sempy_labs/lakehouse/_lakehouse.py | 1 + src/sempy_labs/tom/_model.py | 22 ++ 20 files changed, 1121 insertions(+), 356 deletions(-) create mode 100644 src/sempy_labs/_data_pipelines.py create mode 100644 src/sempy_labs/_documentation.py create mode 100644 src/sempy_labs/_eventhouses.py create mode 100644 src/sempy_labs/_eventstreams.py create mode 100644 src/sempy_labs/_kql_querysets.py create mode 100644 src/sempy_labs/_ml_experiments.py create mode 100644 src/sempy_labs/_ml_models.py create mode 100644 src/sempy_labs/_warehouses.py diff --git a/README.md b/README.md index 5ca4c84c..e5f92baa 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,29 @@ [Read the documentation on ReadTheDocs!](https://semantic-link-labs.readthedocs.io/en/stable/) --- -This is a python library intended to be used in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library was originally intended to solely contain functions used for [migrating semantic models to Direct Lake mode](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration). However, it quickly became apparent that functions within such a library could support many other useful activities in the realm of semantic models, reports, lakehouses and really anything Fabric-related. As such, this library contains a variety of functions ranging from running [Vertipaq Analyzer](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.import_vertipaq_analyzer) or the [Best Practice Analyzer](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.run_model_bpa) against a semantic model to seeing if any [lakehouse tables hit Direct Lake guardrails](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.get_lakehouse_tables) or accessing the [Tabular Object Model](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html) and more! - -Instructions for migrating import/DirectQuery semantic models to Direct Lake mode can be found [here](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration). +Semantic Link Labs is a Python library designed for use in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library extends the capabilities of [Semantic Link](https://learn.microsoft.com/fabric/data-science/semantic-link-overview) offering additional functionalities to seamlessly integrate and work alongside it. The goal of Semantic Link Labs is to simplify technical processes, empowering people to focus on higher level activities and allowing tasks that are better suited for machines to be efficiently handled without human intervention. + +## Featured Scenarios +* Semantic Models + * [Migrating an import/DirectQuery semantic model to Direct Lake](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration) + * [Model Best Practice Analyzer (BPA)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.run_model_bpa) + * [Vertipaq Analyzer](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.vertipaq_analyzer) + * [Tabular Object Model (TOM)](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Tabular%20Object%20Model.ipynb) + * [Translate a semantic model's metadata](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.translate_semantic_model) + * [Check Direct Lake Guardrails](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.get_lakehouse_tables) + * [Refresh](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Semantic%20Model%20Refresh.ipynb), [clear cache](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.clear_cache), [backup](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.backup_semantic_model), [restore](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.restore_semantic_model), [copy backup files](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.copy_semantic_model_backup_file), [move/deploy across workspaces](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.deploy_semantic_model) + * [Run DAX queries which impersonate a user](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.evaluate_dax_impersonation) +* Reports + * [Report Best Practice Analyzer (BPA)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.run_report_bpa) + * [View report metadata](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Report%20Analysis.ipynb) + * [Rebind reports](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.report_rebind) +* Capacities + * [Migrating a Power BI Premium capacity (P sku) to a Fabric capacity (F sku)](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Capacity%20Migration.ipynb) +* APIs + * Wrapper functions for [Power BI](https://learn.microsoft.com/rest/api/power-bi/), [Fabric](https://learn.microsoft.com/rest/api/fabric/articles/using-fabric-apis), and [Azure](https://learn.microsoft.com/rest/api/azure/?view=rest-power-bi-embedded-2021-01-01) APIs + + +### Make sure you check out the starter [notebooks](https://github.com/microsoft/semantic-link-labs/tree/main/notebooks) for getting started! If you encounter any issues, please [raise a bug](https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=). diff --git a/notebooks/Tabular Object Model.ipynb b/notebooks/Tabular Object Model.ipynb index b95949d1..6cac10aa 100644 --- a/notebooks/Tabular Object Model.ipynb +++ b/notebooks/Tabular Object Model.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install semantic-link-labs"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs.tom import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://semantic-link-labs.readthedocs.io/en/0.5.0/sempy_labs.tom.html) to see all available parameters for each function."]},{"cell_type":"markdown","id":"6d46d878","metadata":{},"source":["#### Rename objects in the semantic model"]},{"cell_type":"code","execution_count":null,"id":"1284825a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," t.Name = t.Name.replace('_',' ')\n"]},{"cell_type":"code","execution_count":null,"id":"d3b60303","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," c.Name = c.Name.replace('_',' ')"]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name = t.Name, measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name = t.Name, measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name ='Product', column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," tom.add_data_column(table_name = 'Segment', column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n","\n"," tom.add_calculated_column(table_name = 'Internet Sales', column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name = t.Name, column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name = 'Geography', hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name = t.Name, hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table = 'Internet Sales', from_column = 'ProductKey',\n"," to_table = 'Product', to_column = 'ProductKey', \n"," from_cardinality = 'Many', to_cardinality = 'One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_m_partition(table_name = table_name, partition_name = table_name, expression = 'let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_entity_partition(table_name = table_name, entity_name = table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name = table_name, expression = \"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name = table_name, column_name = 'Color', source_column = \"'Product[Color]\", data_type = 'String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name = 'Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(role_name ='Reader', table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name = r.Name, table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name = 'Reader', table_name = 'Product', column_name = 'Size', permission = 'None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name = r.Name, table_name = t.Name, column_name = 'Size', permission = 'None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name = 'MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language = 'it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language = 'it-IT', property = 'Name', value = 'Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name = 'Parameter', objects = \"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object = t.Columns['Size'])\n"," tom.remove_object(object = t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object = tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object = tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(t.Name + ' : ' + str(rc))\n"," for c in t.Columns:\n"," col_size = tom.total_size(column = c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(full_name + ' : ' + h.Name)"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(full_name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(t.Name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install semantic-link-labs"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs.tom import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://semantic-link-labs.readthedocs.io/en/0.5.0/sempy_labs.tom.html) to see all available parameters for each function."]},{"cell_type":"markdown","id":"6d46d878","metadata":{},"source":["#### Rename objects in the semantic model"]},{"cell_type":"code","execution_count":null,"id":"1284825a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," t.Name = t.Name.replace('_',' ')\n"]},{"cell_type":"code","execution_count":null,"id":"d3b60303","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," c.Name = c.Name.replace('_',' ')"]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name='Internet Sales', measure_name='Sales Amount', expression=\"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name='Internet Sales', measure_name='Order Quantity', expression=\"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name=t.Name, measure_name='Sales Amount', expression=\"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name=t.Name, measure_name='Order Quantity', expression=\"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name='Product', column_name='Size Range', source_column='SizeRange', data_type='Int64')\n"," tom.add_data_column(table_name= 'Segment', column_name='Summary Segment', source_column='SummarySegment', data_type='String')\n","\n"," tom.add_calculated_column(table_name='Internet Sales', column_name='GrossMargin', expression=\"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type='Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name=t.Name, column_name='Size Range', source_column='SizeRange', data_type='Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name='Summary Segment', source_column='SummarySegment', data_type='String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name=t.Name, column_name='GrossMargin', expression=\"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type='Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name='Geography', hierarchy_name='Geo Hierarchy', levels=['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name=t.Name, hierarchy_name='Geo Hierarchy', levels=['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table='Internet Sales', from_column='ProductKey',\n"," to_table='Product', to_column ='ProductKey', \n"," from_cardinality='Many', to_cardinality='One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name='Sales'\n"," tom.add_table(name=table_name)\n"," tom.add_m_partition(table_name=table_name, partition_name=table_name, expression='let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name=table_name)\n"," tom.add_entity_partition(table_name=table_name, entity_name=table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name=table_name, expression=\"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name=table_name, column_name='Color', source_column=\"'Product[Color]\", data_type='String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name='Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(\n"," role_name='Reader', \n"," table_name='Product',\n"," filter_expression=\"'Dim Product'[Color] = \\\"Blue\\\"\"\n"," )"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name=r.Name, table_name='Product', filter_expression=\"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name='Reader', table_name='Product', column_name='Size', permission='None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name=r.Name, table_name=t.Name, column_name='Size', permission='None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name='MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name='MyCalcGroup', calculation_item_name='YTD', expression=\"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name='MyCalcGroup', calculation_item_name='MTD', expression=\"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name=t.Name, calculation_item_name='YTD', expression=\"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name=t.Name, calculation_item_name='MTD', expression=\"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language='it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language='it-IT', property='Name', value='Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name='Parameter', objects=\"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object=t.Columns['Size'])\n"," tom.remove_object(object=t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object=tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object=tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(f\"{t.Name} : {str(rc)}\")\n"," for c in t.Columns:\n"," col_size = tom.total_size(object=c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(f\"{full_name} : {h.Name}\")"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(f\"{full_name} : {rel_name}\")"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(f\"{t.Name} : {rel_name}\")"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py index 59541dd8..b6c67401 100644 --- a/src/sempy_labs/__init__.py +++ b/src/sempy_labs/__init__.py @@ -1,3 +1,38 @@ +from sempy_labs._ml_models import ( + list_ml_models, + create_ml_model, + delete_ml_model, +) +from sempy_labs._ml_experiments import ( + list_ml_experiments, + create_ml_experiment, + delete_ml_experiment, +) +from sempy_labs._warehouses import ( + create_warehouse, + list_warehouses, + delete_warehouse, +) +from sempy_labs._data_pipelines import ( + list_data_pipelines, + create_data_pipeline, + delete_data_pipeline, +) +from sempy_labs._eventhouses import ( + create_eventhouse, + list_eventhouses, + delete_eventhouse, +) +from sempy_labs._eventstreams import ( + list_eventstreams, + create_eventstream, + delete_eventstream, +) +from sempy_labs._kql_querysets import ( + list_kql_querysets, + create_kql_queryset, + delete_kql_queryset, +) from sempy_labs._kql_databases import ( list_kql_databases, create_kql_database, @@ -113,22 +148,15 @@ list_shortcuts, get_object_level_security, list_capacities, + # list_tables, # list_annotations, # list_columns, - list_dashboards, - # list_datamarts, - # list_datapipelines, - # list_eventstreams, + # list_relationships, # list_kpis, - # list_kqlquerysets, + list_dashboards, + list_datamarts, list_lakehouses, - # list_mlexperiments, - # list_mlmodels, - # list_relationships, - # list_sqlendpoints, - # list_tables, - list_warehouses, - create_warehouse, + list_sql_endpoints, update_item, ) from sempy_labs._helper_functions import ( @@ -348,4 +376,27 @@ "list_kql_databases", "create_kql_database", "delete_kql_database", + "create_warehouse", + "list_warehouses", + "delete_warehouse", + "create_eventhouse", + "list_eventhouses", + "delete_eventhouse", + "list_data_pipelines", + "create_data_pipeline", + "delete_data_pipeline", + "list_eventstreams", + "create_eventstream", + "delete_eventstream", + "list_kql_querysets", + "create_kql_queryset", + "delete_kql_queryset", + "list_ml_models", + "create_ml_model", + "delete_ml_model", + "list_ml_experiments", + "create_ml_experiment", + "delete_ml_experiment", + "list_sql_endpoints", + "list_datamarts", ] diff --git a/src/sempy_labs/_ai.py b/src/sempy_labs/_ai.py index 43e40723..b26e4ad2 100644 --- a/src/sempy_labs/_ai.py +++ b/src/sempy_labs/_ai.py @@ -1,8 +1,6 @@ import sempy import sempy.fabric as fabric import pandas as pd -from synapse.ml.services.openai import OpenAICompletion -from pyspark.sql.functions import col from pyspark.sql import SparkSession from typing import List, Optional, Union from IPython.display import display diff --git a/src/sempy_labs/_data_pipelines.py b/src/sempy_labs/_data_pipelines.py new file mode 100644 index 00000000..245cd316 --- /dev/null +++ b/src/sempy_labs/_data_pipelines.py @@ -0,0 +1,118 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_data_pipelines(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the data pipelines within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the data pipelines within a workspace. + """ + + df = pd.DataFrame(columns=["Data Pipeline Name", "Data Pipeline ID", "Description"]) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/dataPipelines") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + new_data = { + "Data Pipeline Name": v.get("displayName"), + "Data Pipeline ID": v.get("id"), + "Description": v.get("description"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_data_pipeline( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric data pipeline. + + Parameters + ---------- + name: str + Name of the data pipeline. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/dataPipelines", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' data pipeline has been created within the '{workspace}' workspace." + ) + + +def delete_data_pipeline(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric data pipeline. + + Parameters + ---------- + name: str + Name of the data pipeline. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="DataPipeline", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/dataPipelines/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' data pipeline within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_documentation.py b/src/sempy_labs/_documentation.py new file mode 100644 index 00000000..2355f3f6 --- /dev/null +++ b/src/sempy_labs/_documentation.py @@ -0,0 +1,118 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from typing import List, Optional + + +def list_all_items(workspaces: Optional[str | List[str]] = None): + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Workspace Id", + "Item Name", + "Item Type", + "Description", + ] + ) + + if isinstance(workspaces, str): + workspaces = [workspaces] + + dfW = fabric.list_workspaces() + if workspaces is not None: + dfW = dfW[dfW["Name"].isin(workspaces)] + + for _, r in dfW.iterrows(): + workspace_name = r["Name"] + workspace_id = r["Id"] + dfI = fabric.list_items(workspace=workspace_name) + for _, r2 in dfI.iterrows(): + + new_data = { + "Workspace Name": workspace_name, + "Workspace Id": workspace_id, + "Item Name": r2["Name"], + "Item Type": r2["Type"], + "Description": r2["Description"], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def data_dictionary(dataset: str, workspace: Optional[str | None] = None): + + from sempy_labs.tom import connect_semantic_model + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + df = pd.DataFrame(columns=['Workspace Name', 'Model Name', 'Table Name', 'Object Type', 'Object Name', 'Hidden Flag', 'Description', 'Display Folder', 'Measure Formula']) + + with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom: + for t in tom.model.Tables: + expr = None + if tom.is_calculated_table(table_name=t.Name): + pName = next(p.Name for p in t.Partitions) + expr = t.Partitions[pName].Source.Expression + + new_data = { + "Workspace Name": workspace, + "Model Name": dataset, + "Table Name": t.Name, + "Object Type": t.ObjectType, + "Object Name": t.Name, + "Hidden Flag": t.IsHidden, + "Description": t.Description, + "Display Folder": None, + "Measure Formula": expr, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for c in t.Columns: + if c.Type != TOM.ColumnType.RowNumber: + expr = None + if tom.is_calculated_column(table_name=t.Name, column_name=c.Name): + expr = c.Expression + + new_data = { + "Workspace Name": workspace, + "Model Name": dataset, + "Table Name": t.Name, + "Object Type": c.ObjectType, + "Object Name": c.Name, + "Hidden Flag": c.IsHidden, + "Description": c.Description, + "Display Folder": c.DisplayFolder, + "Measure Formula": expr, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for m in t.Measures: + new_data = { + "Workspace Name": workspace, + "Model Name": dataset, + "Table Name": t.Name, + "Object Type": m.ObjectType, + "Object Name": m.Name, + "Hidden Flag": m.IsHidden, + "Description": m.Description, + "Display Folder": m.DisplayFolder, + "Measure Formula": m.Expression, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + if t.CalculationGroup is not None: + for ci in t.CalculationGroup.CalculationItems: + new_data = { + "Workspace Name": workspace, + "Model Name": dataset, + "Table Name": t.Name, + "Object Type": 'Calculation Item', + "Object Name": ci.Name, + "Hidden Flag": t.IsHidden, + "Description": ci.Description, + "Display Folder": None, + "Measure Formula": ci.Expression, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df diff --git a/src/sempy_labs/_eventhouses.py b/src/sempy_labs/_eventhouses.py new file mode 100644 index 00000000..ab786aaf --- /dev/null +++ b/src/sempy_labs/_eventhouses.py @@ -0,0 +1,118 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def create_eventhouse( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric eventhouse. + + Parameters + ---------- + name: str + Name of the eventhouse. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/eventhouses", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' eventhouse has been created within the '{workspace}' workspace." + ) + + +def list_eventhouses(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the eventhouses within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the eventhouses within a workspace. + """ + + df = pd.DataFrame(columns=["Eventhouse Name", "Eventhouse Id", "Description"]) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/eventhouses") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + new_data = { + "Eventhouse Name": v.get("displayName"), + "Eventhouse Id": v.get("id"), + "Description": v.get("description"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def delete_eventhouse(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric eventhouse. + + Parameters + ---------- + name: str + Name of the eventhouse. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="Eventhouse", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/eventhouses/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' eventhouse within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_eventstreams.py b/src/sempy_labs/_eventstreams.py new file mode 100644 index 00000000..e56d55e2 --- /dev/null +++ b/src/sempy_labs/_eventstreams.py @@ -0,0 +1,118 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_eventstreams(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the eventstreams within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the eventstreams within a workspace. + """ + + df = pd.DataFrame(columns=["Eventstream Name", "Eventstream Id", "Description"]) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/eventstreams") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + new_data = { + "Eventstream Name": v.get("displayName"), + "Eventstream Id": v.get("id"), + "Description": v.get("description"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_eventstream( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric eventstream. + + Parameters + ---------- + name: str + Name of the eventstream. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/eventstreams", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' eventstream has been created within the '{workspace}' workspace." + ) + + +def delete_eventstream(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric eventstream. + + Parameters + ---------- + name: str + Name of the eventstream. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="Eventstream", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/eventstreams/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' eventstream within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 3f0dea9d..f345c4e5 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -1051,3 +1051,18 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> ) return dfE_filt["Environment Id"].iloc[0] + + +def resolve_notebook_id(notebook: str, workspace: Optional[str] = None) -> UUID: + + workspace = fabric.resolve_workspace_name(workspace) + + dfI = fabric.list_items(workspace=workspace, type="Notebook") + dfI_filt = dfI[dfI["Display Name"] == notebook] + + if len(dfI_filt) == 0: + raise ValueError( + f"{icons.red_dot} The '{notebook}' notebook does not exist within the '{workspace}' workspace." + ) + + return dfI_filt["Id"].iloc[0] diff --git a/src/sempy_labs/_icons.py b/src/sempy_labs/_icons.py index ac717ae9..b08d8528 100644 --- a/src/sempy_labs/_icons.py +++ b/src/sempy_labs/_icons.py @@ -101,3 +101,15 @@ "clearValues", "defragment", ] + +itemTypes = { + "DataPipeline": "dataPipelines", + "Eventstream": "eventstreams", + "KQLDatabase": "kqlDatabases", + "KQLQueryset": "kqlQuerysets", + "Lakehouse": "lakehouses", + "MLExperiment": "mlExperiments", + "MLModel": "mlModels", + "Notebook": "notebooks", + "Warehouse": "warehouses", +} diff --git a/src/sempy_labs/_kql_databases.py b/src/sempy_labs/_kql_databases.py index 3f5c74bb..3c0c54c7 100644 --- a/src/sempy_labs/_kql_databases.py +++ b/src/sempy_labs/_kql_databases.py @@ -93,7 +93,7 @@ def create_kql_database( client = fabric.FabricRestClient() response = client.post( - f"/v1/workspaces/{workspace_id}/environments", json=request_body + f"/v1/workspaces/{workspace_id}/kqlDatabases", json=request_body ) lro(client, response, status_codes=[201, 202]) @@ -118,12 +118,9 @@ def delete_kql_database(name: str, workspace: Optional[str] = None): """ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - dfK = list_kql_databases(workspace=workspace) - dfK_filt = dfK[dfK['KQL Database Name'] == name] - - if len(dfK_filt) == 0: - raise ValueError(f"{icons.red_dot} The '{name}' KQL database does not exist within the '{workspace}' workspace.") - kql_database_id = dfK_filt['KQL Database Id'].iloc[0] + kql_database_id = fabric.resolve_item_id( + item_name=name, type="KQLDatabase", workspace=workspace + ) client = fabric.FabricRestClient() response = client.delete( diff --git a/src/sempy_labs/_kql_querysets.py b/src/sempy_labs/_kql_querysets.py new file mode 100644 index 00000000..5573c8d9 --- /dev/null +++ b/src/sempy_labs/_kql_querysets.py @@ -0,0 +1,124 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_kql_querysets(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the KQL querysets within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the KQL querysets within a workspace. + """ + + df = pd.DataFrame( + columns=[ + "KQL Queryset Name", + "KQL Queryset Id", + "Description", + ] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/kqlQuerysets") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + new_data = { + "KQL Queryset Name": v.get("displayName"), + "KQL Queryset Id": v.get("id"), + "Description": v.get("description"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_kql_queryset( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a KQL queryset. + + Parameters + ---------- + name: str + Name of the KQL queryset. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/kqlQuerysets", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' KQL queryset has been created within the '{workspace}' workspace." + ) + + +def delete_kql_queryset(name: str, workspace: Optional[str] = None): + """ + Deletes a KQL queryset. + + Parameters + ---------- + name: str + Name of the KQL queryset. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + kql_database_id = fabric.resolve_item_id( + item_name=name, type="KQLQueryset", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete( + f"/v1/workspaces/{workspace_id}/kqlQuerysets/{kql_database_id}" + ) + + if response.status_code != 200: + raise FabricHTTPException(response) + print( + f"{icons.green_dot} The '{name}' KQL queryset within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py index 183a0d96..73ca577d 100644 --- a/src/sempy_labs/_list_functions.py +++ b/src/sempy_labs/_list_functions.py @@ -5,7 +5,6 @@ resolve_lakehouse_id, resolve_dataset_id, pagination, - lro, resolve_item_type, format_dax_object_name, ) @@ -706,9 +705,9 @@ def list_lakehouses(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: +def list_sql_endpoints(workspace: Optional[str] = None) -> pd.DataFrame: """ - Shows the warehouses within a workspace. + Shows the SQL endpoints within a workspace. Parameters ---------- @@ -720,64 +719,10 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: Returns ------- pandas.DataFrame - A pandas dataframe showing the warehouses within a workspace. + A pandas dataframe showing the SQL endpoints within a workspace. """ - df = pd.DataFrame( - columns=[ - "Warehouse Name", - "Warehouse ID", - "Description", - "Connection Info", - "Created Date", - "Last Updated Time", - ] - ) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/warehouses") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - prop = v.get("properties", {}) - - new_data = { - "Warehouse Name": v.get("displayName"), - "Warehouse ID": v.get("id"), - "Description": v.get("description"), - "Connection Info": prop.get("connectionInfo"), - "Created Date": prop.get("createdDate"), - "Last Updated Time": prop.get("lastUpdatedTime"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_sqlendpoints(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the SQL Endpoints within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the SQL Endpoints within a workspace. - """ - - df = pd.DataFrame(columns=["SQL Endpoint ID", "SQL Endpoint Name", "Description"]) + df = pd.DataFrame(columns=["SQL Endpoint Id", "SQL Endpoint Name", "Description"]) (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) @@ -792,7 +737,7 @@ def list_sqlendpoints(workspace: Optional[str] = None) -> pd.DataFrame: for v in r.get("value", []): new_data = { - "SQL Endpoint ID": v.get("id"), + "SQL Endpoint Id": v.get("id"), "SQL Endpoint Name": v.get("displayName"), "Description": v.get("description"), } @@ -801,211 +746,6 @@ def list_sqlendpoints(workspace: Optional[str] = None) -> pd.DataFrame: return df -def list_kql_querysets(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the KQL Querysets within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the KQL Querysets within a workspace. - """ - - df = pd.DataFrame(columns=["KQL Queryset Name", "KQL Queryset ID", "Description"]) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/kqlQuerysets") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - - new_data = { - "KQL Queryset Name": v.get("displayName"), - "KQL Queryset ID": v.get("id"), - "Description": v.get("description"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_ml_models(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the ML models within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the ML models within a workspace. - """ - - df = pd.DataFrame(columns=["ML Model Name", "ML Model ID", "Description"]) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/mlModels") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - model_id = v.get("id") - modelName = v.get("displayName") - desc = v.get("description") - - new_data = { - "ML Model Name": modelName, - "ML Model ID": model_id, - "Description": desc, - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_eventstreams(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the eventstreams within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the eventstreams within a workspace. - """ - - df = pd.DataFrame(columns=["Eventstream Name", "Eventstream ID", "Description"]) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/eventstreams") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - new_data = { - "Eventstream Name": v.get("displayName"), - "Eventstream ID": v.get("id"), - "Description": v.get("description"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_datapipelines(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the data pipelines within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the data pipelines within a workspace. - """ - - df = pd.DataFrame(columns=["Data Pipeline Name", "Data Pipeline ID", "Description"]) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/dataPipelines") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - new_data = { - "Data Pipeline Name": v.get("displayName"), - "Data Pipeline ID": v.get("id"), - "Description": v.get("description"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_ml_experiments(workspace: Optional[str] = None) -> pd.DataFrame: - """ - Shows the ML experiments within a workspace. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the ML experiments within a workspace. - """ - - df = pd.DataFrame(columns=["ML Experiment Name", "ML Experiment ID", "Description"]) - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/mlExperiments") - if response.status_code != 200: - raise FabricHTTPException(response) - - responses = pagination(client, response) - - for r in responses: - for v in r.get("value", []): - new_data = { - "ML Experiment Name": v.get("displayName"), - "ML Experiment ID": v.get("id"), - "Description": v.get("description"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - def list_datamarts(workspace: Optional[str] = None) -> pd.DataFrame: """ Shows the datamarts within a workspace. @@ -1046,43 +786,6 @@ def list_datamarts(workspace: Optional[str] = None) -> pd.DataFrame: return df -def create_warehouse( - warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None -): - """ - Creates a Fabric warehouse. - - Parameters - ---------- - warehouse: str - Name of the warehouse. - description : str, default=None - A description of the warehouse. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - """ - - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - request_body = {"displayName": warehouse} - - if description: - request_body["description"] = description - - client = fabric.FabricRestClient() - response = client.post( - f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body - ) - - lro(client, response, status_codes=[201, 202]) - - print( - f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace." - ) - - def update_item( item_type: str, current_name: str, @@ -1110,27 +813,14 @@ def update_item( """ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - itemTypes = { - "DataPipeline": "dataPipelines", - "Eventstream": "eventstreams", - "KQLDatabase": "kqlDatabases", - "KQLQueryset": "kqlQuerysets", - "Lakehouse": "lakehouses", - "MLExperiment": "mlExperiments", - "MLModel": "mlModels", - "Notebook": "notebooks", - "Warehouse": "warehouses", - } - item_type = item_type.replace(" ", "").capitalize() - if item_type not in itemTypes.keys(): + if item_type not in icons.itemTypes.keys(): raise ValueError( f"{icons.red_dot} The '{item_type}' is not a valid item type. " ) - itemType = itemTypes[item_type] + itemType = icons.itemTypes[item_type] dfI = fabric.list_items(workspace=workspace, type=item_type) dfI_filt = dfI[(dfI["Display Name"] == current_name)] diff --git a/src/sempy_labs/_ml_experiments.py b/src/sempy_labs/_ml_experiments.py new file mode 100644 index 00000000..166f046d --- /dev/null +++ b/src/sempy_labs/_ml_experiments.py @@ -0,0 +1,122 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_ml_experiments(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the ML experiments within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the ML models within a workspace. + """ + + df = pd.DataFrame(columns=["ML Experiment Name", "ML Experiment Id", "Description"]) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/mlExperiments") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + model_id = v.get("id") + modelName = v.get("displayName") + desc = v.get("description") + + new_data = { + "ML Experiment Name": modelName, + "ML Experiment Id": model_id, + "Description": desc, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_ml_experiment( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric ML experiment. + + Parameters + ---------- + name: str + Name of the ML experiment. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/mlExperiments", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' ML experiment has been created within the '{workspace}' workspace." + ) + + +def delete_ml_experiment(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric ML experiment. + + Parameters + ---------- + name: str + Name of the ML experiment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="MLExperiment", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/mlExperiments/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' ML experiment within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_ml_models.py b/src/sempy_labs/_ml_models.py new file mode 100644 index 00000000..fd3a3a05 --- /dev/null +++ b/src/sempy_labs/_ml_models.py @@ -0,0 +1,120 @@ +import sempy.fabric as fabric +import pandas as pd +import sempy_labs._icons as icons +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + lro, + pagination, +) +from sempy.fabric.exceptions import FabricHTTPException + + +def list_ml_models(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the ML models within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the ML models within a workspace. + """ + + df = pd.DataFrame(columns=["ML Model Name", "ML Model Id", "Description"]) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/mlModels") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + model_id = v.get("id") + modelName = v.get("displayName") + desc = v.get("description") + + new_data = { + "ML Model Name": modelName, + "ML Model Id": model_id, + "Description": desc, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def create_ml_model( + name: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric ML model. + + Parameters + ---------- + name: str + Name of the ML model. + description : str, default=None + A description of the environment. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": name} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post(f"/v1/workspaces/{workspace_id}/mlModels", json=request_body) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{name}' ML model has been created within the '{workspace}' workspace." + ) + + +def delete_ml_model(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric ML model. + + Parameters + ---------- + name: str + Name of the ML model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="MLModel", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/mlModels/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' ML model within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index 1db199a7..a208dd14 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -13,10 +13,7 @@ resolve_dataset_id, get_language_codes, ) -from sempy_labs.lakehouse import ( - get_lakehouse_tables, - lakehouse_attached -) +from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached from sempy_labs.tom import connect_semantic_model from sempy_labs._model_bpa_rules import model_bpa_rules from typing import Optional diff --git a/src/sempy_labs/_notebooks.py b/src/sempy_labs/_notebooks.py index 20e10595..03d0b861 100644 --- a/src/sempy_labs/_notebooks.py +++ b/src/sempy_labs/_notebooks.py @@ -8,6 +8,7 @@ resolve_workspace_name_and_id, lro, _decode_b64, + resolve_notebook_id, ) from sempy.fabric.exceptions import FabricHTTPException @@ -37,16 +38,7 @@ def get_notebook_definition( """ (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - dfI = fabric.list_items(workspace=workspace, type="Notebook") - dfI_filt = dfI[dfI["Display Name"] == notebook_name] - - if len(dfI_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{notebook_name}' notebook does not exist within the '{workspace}' workspace." - ) - - notebook_id = dfI_filt["Id"].iloc[0] + notebook_id = resolve_notebook_id(notebook=notebook_name, workspace=workspace) client = fabric.FabricRestClient() response = client.post( f"v1/workspaces/{workspace_id}/notebooks/{notebook_id}/getDefinition", diff --git a/src/sempy_labs/_warehouses.py b/src/sempy_labs/_warehouses.py new file mode 100644 index 00000000..e4106909 --- /dev/null +++ b/src/sempy_labs/_warehouses.py @@ -0,0 +1,132 @@ +import sempy.fabric as fabric +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + pagination, + lro, +) +import pandas as pd +from typing import Optional +import sempy_labs._icons as icons +from sempy.fabric.exceptions import FabricHTTPException + + +def create_warehouse( + warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None +): + """ + Creates a Fabric warehouse. + + Parameters + ---------- + warehouse: str + Name of the warehouse. + description : str, default=None + A description of the warehouse. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + request_body = {"displayName": warehouse} + + if description: + request_body["description"] = description + + client = fabric.FabricRestClient() + response = client.post( + f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body + ) + + lro(client, response, status_codes=[201, 202]) + + print( + f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + ) + + +def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: + """ + Shows the warehouses within a workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the warehouses within a workspace. + """ + + df = pd.DataFrame( + columns=[ + "Warehouse Name", + "Warehouse ID", + "Description", + "Connection Info", + "Created Date", + "Last Updated Time", + ] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + response = client.get(f"/v1/workspaces/{workspace_id}/warehouses") + if response.status_code != 200: + raise FabricHTTPException(response) + + responses = pagination(client, response) + + for r in responses: + for v in r.get("value", []): + prop = v.get("properties", {}) + + new_data = { + "Warehouse Name": v.get("displayName"), + "Warehouse ID": v.get("id"), + "Description": v.get("description"), + "Connection Info": prop.get("connectionInfo"), + "Created Date": prop.get("createdDate"), + "Last Updated Time": prop.get("lastUpdatedTime"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def delete_warehouse(name: str, workspace: Optional[str] = None): + """ + Deletes a Fabric warehouse. + + Parameters + ---------- + name: str + Name of the warehouse. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + item_id = fabric.resolve_item_id( + item_name=name, type="Warehouse", workspace=workspace + ) + + client = fabric.FabricRestClient() + response = client.delete(f"/v1/workspaces/{workspace_id}/warehouses/{item_id}") + + if response.status_code != 200: + raise FabricHTTPException(response) + + print( + f"{icons.green_dot} The '{name}' warehouse within the '{workspace}' workspace has been deleted." + ) diff --git a/src/sempy_labs/lakehouse/_lakehouse.py b/src/sempy_labs/lakehouse/_lakehouse.py index 5dc9bb93..26f1b976 100644 --- a/src/sempy_labs/lakehouse/_lakehouse.py +++ b/src/sempy_labs/lakehouse/_lakehouse.py @@ -15,6 +15,7 @@ def lakehouse_attached() -> bool: Returns True if a lakehouse is attached to the notebook. """ from pyspark.sql import SparkSession + spark = SparkSession.builder.getOrCreate() lakeId = spark.conf.get("trident.lakehouse.id") diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 15b97e15..10e107ec 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -4160,6 +4160,28 @@ def remove_sort_by_column(self, table_name: str, column_name: str): self.model.Tables[table_name].Columns[column_name].SortByColumn = None + def is_calculated_column(self, table_name: str, column_name: str): + """ + Identifies if a column is a calculated column. + + Parameters + ---------- + table_name : str + Name of the table in which the column resides. + column_name : str + Name of the column. + + Returns + ------- + bool + A boolean value indicating whether the column is a calculated column. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + + c = self.model.Tables[table_name].Columns[column_name] + return c.Type == TOM.ColumnType.Calculated + def is_calculated_table(self, table_name: str): """ Identifies if a table is a calculated table. From cb8cb34d60de77514f84651494b298d6225bb324 Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 29 Sep 2024 12:10:18 +0200 Subject: [PATCH 06/12] fixed issue with tom - lineagetag, sourcelineagetag, schemaname --- src/sempy_labs/_documentation.py | 33 ++++++++++--- src/sempy_labs/_helper_functions.py | 6 +++ src/sempy_labs/directlake/_dl_helper.py | 5 +- .../_migrate_calctables_to_lakehouse.py | 5 ++ src/sempy_labs/tom/_model.py | 49 +++++++++++++++++-- 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/sempy_labs/_documentation.py b/src/sempy_labs/_documentation.py index 2355f3f6..e5ad8fd2 100644 --- a/src/sempy_labs/_documentation.py +++ b/src/sempy_labs/_documentation.py @@ -44,12 +44,27 @@ def list_all_items(workspaces: Optional[str | List[str]] = None): def data_dictionary(dataset: str, workspace: Optional[str | None] = None): from sempy_labs.tom import connect_semantic_model + sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM - df = pd.DataFrame(columns=['Workspace Name', 'Model Name', 'Table Name', 'Object Type', 'Object Name', 'Hidden Flag', 'Description', 'Display Folder', 'Measure Formula']) + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Model Name", + "Table Name", + "Object Type", + "Object Name", + "Hidden Flag", + "Description", + "Display Folder", + "Measure Formula", + ] + ) - with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom: + with connect_semantic_model( + dataset=dataset, readonly=True, workspace=workspace + ) as tom: for t in tom.model.Tables: expr = None if tom.is_calculated_table(table_name=t.Name): @@ -85,7 +100,9 @@ def data_dictionary(dataset: str, workspace: Optional[str | None] = None): "Display Folder": c.DisplayFolder, "Measure Formula": expr, } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for m in t.Measures: new_data = { "Workspace Name": workspace, @@ -98,7 +115,9 @@ def data_dictionary(dataset: str, workspace: Optional[str | None] = None): "Display Folder": m.DisplayFolder, "Measure Formula": m.Expression, } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) if t.CalculationGroup is not None: for ci in t.CalculationGroup.CalculationItems: @@ -106,13 +125,15 @@ def data_dictionary(dataset: str, workspace: Optional[str | None] = None): "Workspace Name": workspace, "Model Name": dataset, "Table Name": t.Name, - "Object Type": 'Calculation Item', + "Object Type": "Calculation Item", "Object Name": ci.Name, "Hidden Flag": t.IsHidden, "Description": ci.Description, "Display Folder": None, "Measure Formula": ci.Expression, } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index f345c4e5..41d89e70 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -3,6 +3,7 @@ import json import base64 import time +import uuid from sempy.fabric.exceptions import FabricHTTPException import pandas as pd from functools import wraps @@ -1066,3 +1067,8 @@ def resolve_notebook_id(notebook: str, workspace: Optional[str] = None) -> UUID: ) return dfI_filt["Id"].iloc[0] + + +def generate_guid(): + + return str(uuid.uuid4()) diff --git a/src/sempy_labs/directlake/_dl_helper.py b/src/sempy_labs/directlake/_dl_helper.py index 7a30837e..2ba75a1c 100644 --- a/src/sempy_labs/directlake/_dl_helper.py +++ b/src/sempy_labs/directlake/_dl_helper.py @@ -73,6 +73,7 @@ def generate_direct_lake_semantic_model( workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None, + schema: str = "dbo", overwrite: bool = False, refresh: bool = True, ): @@ -96,6 +97,8 @@ def generate_direct_lake_semantic_model( The Fabric workspace in which the lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + schema : str, default="dbo" + The schema used for the lakehouse. overwrite : bool, default=False If set to True, overwrites the existing semantic model if it already exists. refresh: bool, default=True @@ -158,7 +161,7 @@ def dyn_connect(): for t in lakehouse_tables: tom.add_table(name=t) - tom.add_entity_partition(table_name=t, entity_name=t) + tom.add_entity_partition(table_name=t, entity_name=t, schema_name=schema) dfLC_filt = dfLC[dfLC["Table Name"] == t] for i, r in dfLC_filt.iterrows(): lakeCName = r["Column Name"] diff --git a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py index 0c73f621..e8fe6bcc 100644 --- a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py +++ b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -8,6 +8,7 @@ resolve_lakehouse_id, create_abfss_path, retry, + generate_guid, ) from sempy_labs.tom import connect_semantic_model from pyspark.sql import SparkSession @@ -343,6 +344,8 @@ def dyn_connect(): tbl = TOM.Table() tbl.Name = tName + tbl.LineageTag = generate_guid() + tbl.SourceLineageTag = generate_guid() tbl.Partitions.Add(par) columns = ["Value1", "Value2", "Value3"] @@ -352,6 +355,8 @@ def dyn_connect(): col.Name = colName col.SourceColumn = "[" + colName + "]" col.DataType = TOM.DataType.String + col.LineageTag = generate_guid() + col.SourceLineageTag = generate_guid() tbl.Columns.Add(col) diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 10e107ec..f1f42c07 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -3,7 +3,10 @@ import pandas as pd import re from datetime import datetime -from sempy_labs._helper_functions import format_dax_object_name +from sempy_labs._helper_functions import ( + format_dax_object_name, + generate_guid, +) from sempy_labs._list_functions import list_relationships from sempy_labs._refresh_semantic_model import refresh_semantic_model from sempy_labs.directlake._dl_helper import check_fallback_reason @@ -283,8 +286,12 @@ def add_measure( obj.FormatStringDefinition = fsd if lineage_tag is not None: obj.LineageTag = lineage_tag + else: + obj.LineageTag = generate_guid() if source_lineage_tag is not None: obj.SourceLineageTag = source_lineage_tag + else: + obj.SourceLineageTag = generate_guid() if detail_rows_expression is not None: drd = TOM.DetailRowsDefinition() drd.Expression = detail_rows_expression @@ -376,8 +383,12 @@ def add_calculated_table_column( obj.DataCategory = data_category if lineage_tag is not None: obj.LineageTag = lineage_tag + else: + obj.LineageTag = generate_guid() if source_lineage_tag is not None: obj.SourceLineageTag = source_lineage_tag + else: + obj.SourceLineageTag = generate_guid() self.model.Tables[table_name].Columns.Add(obj) def add_data_column( @@ -462,8 +473,12 @@ def add_data_column( obj.DataCategory = data_category if lineage_tag is not None: obj.LineageTag = lineage_tag + else: + obj.LineageTag = generate_guid() if source_lineage_tag is not None: obj.SourceLineageTag = source_lineage_tag + else: + obj.SourceLineagetTag = generate_guid() self.model.Tables[table_name].Columns.Add(obj) def add_calculated_column( @@ -548,8 +563,12 @@ def add_calculated_column( obj.DataCategory = data_category if lineage_tag is not None: obj.LineageTag = lineage_tag + else: + obj.LineageTag = generate_guid() if source_lineage_tag is not None: obj.SourceLineageTag = source_lineage_tag + else: + obj.SourceLineagetTag = generate_guid() self.model.Tables[table_name].Columns.Add(obj) def add_calculation_item( @@ -761,8 +780,12 @@ def add_hierarchy( obj.Description = hierarchy_description if lineage_tag is not None: obj.LineageTag = lineage_tag + else: + obj.LineageTag = generate_guid() if source_lineage_tag is not None: obj.SourceLineageTag = source_lineage_tag + else: + obj.SourceLineagetTag = generate_guid() self.model.Tables[table_name].Hierarchies.Add(obj) for col in columns: @@ -770,6 +793,8 @@ def add_hierarchy( lvl.Column = self.model.Tables[table_name].Columns[col] lvl.Name = levels[columns.index(col)] lvl.Ordinal = columns.index(col) + lvl.LineageTag = generate_guid() + lvl.SourceLineageTag = generate_guid() self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add(lvl) def add_relationship( @@ -939,8 +964,12 @@ def add_expression( exp.Description = description if lineage_tag is not None: exp.LineageTag = lineage_tag + else: + exp.LineageTag = generate_guid() if source_lineage_tag is not None: exp.SourceLineageTag = source_lineage_tag + else: + exp.SourceLineageTag = generate_guid() exp.Kind = TOM.ExpressionKind.M exp.Expression = expression @@ -1034,7 +1063,7 @@ def add_entity_partition( entity_name: str, expression: Optional[str] = None, description: Optional[str] = None, - schema_name: Optional[str] = None, + schema_name: str = "dbo", ): """ Adds an entity partition to a table within a semantic model. @@ -1050,7 +1079,7 @@ def add_entity_partition( Defaults to None which resolves to the 'DatabaseQuery' expression. description : str, default=None A description for the partition. - schema_name : str, default=None + schema_name : str, default="dbo" The schema name. """ import Microsoft.AnalysisServices.Tabular as TOM @@ -1062,8 +1091,7 @@ def add_entity_partition( ep.ExpressionSource = self.model.Expressions["DatabaseQuery"] else: ep.ExpressionSource = self.model.Expressions[expression] - if schema_name is not None: - ep.SchemaName = schema_name + ep.SchemaName = schema_name p = TOM.Partition() p.Name = table_name p.Source = ep @@ -1072,6 +1100,9 @@ def add_entity_partition( p.Description = description self.model.Tables[table_name].Partitions.Add(p) + self.model.Tables[table_name].SourceLineageTag = ( + f"[{schema_name}].[{entity_name}]" + ) def set_alternate_of( self, @@ -2618,8 +2649,12 @@ def add_table( t.DataCategory = data_category if lineage_tag is not None: t.LineageTag = lineage_tag + else: + t.LineageTag = generate_guid() if source_lineage_tag is not None: t.SourceLineageTag = source_lineage_tag + else: + t.SourceLineagetTag = generate_guid() t.Hidden = hidden self.model.Tables.Add(t) @@ -2670,8 +2705,12 @@ def add_calculated_table( t.DataCategory = data_category if lineage_tag is not None: t.LineageTag = lineage_tag + else: + t.LineageTag = generate_guid() if source_lineage_tag is not None: t.SourceLineageTag = source_lineage_tag + else: + t.SourceLineagetTag = generate_guid() t.Hidden = hidden t.Partitions.Add(par) self.model.Tables.Add(t) From c2e4d9bdd29853d4c84d6699bd58f55b6ef4ae7f Mon Sep 17 00:00:00 2001 From: Michael Date: Sun, 29 Sep 2024 12:28:22 +0200 Subject: [PATCH 07/12] updated 2 functions --- src/sempy_labs/_environments.py | 8 ++++---- src/sempy_labs/_warehouses.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/sempy_labs/_environments.py b/src/sempy_labs/_environments.py index b7d2faf5..a6410356 100644 --- a/src/sempy_labs/_environments.py +++ b/src/sempy_labs/_environments.py @@ -11,14 +11,14 @@ def create_environment( - environment: str, description: Optional[str] = None, workspace: Optional[str] = None + name: str, description: Optional[str] = None, workspace: Optional[str] = None ): """ Creates a Fabric environment. Parameters ---------- - environment: str + name: str Name of the environment. description : str, default=None A description of the environment. @@ -30,7 +30,7 @@ def create_environment( (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - request_body = {"displayName": environment} + request_body = {"displayName": name} if description: request_body["description"] = description @@ -43,7 +43,7 @@ def create_environment( lro(client, response, status_codes=[201, 202]) print( - f"{icons.green_dot} The '{environment}' environment has been created within the '{workspace}' workspace." + f"{icons.green_dot} The '{name}' environment has been created within the '{workspace}' workspace." ) diff --git a/src/sempy_labs/_warehouses.py b/src/sempy_labs/_warehouses.py index e4106909..de55e5d8 100644 --- a/src/sempy_labs/_warehouses.py +++ b/src/sempy_labs/_warehouses.py @@ -11,14 +11,14 @@ def create_warehouse( - warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None + name: str, description: Optional[str] = None, workspace: Optional[str] = None ): """ Creates a Fabric warehouse. Parameters ---------- - warehouse: str + name: str Name of the warehouse. description : str, default=None A description of the warehouse. @@ -30,7 +30,7 @@ def create_warehouse( (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - request_body = {"displayName": warehouse} + request_body = {"displayName": name} if description: request_body["description"] = description @@ -43,7 +43,7 @@ def create_warehouse( lro(client, response, status_codes=[201, 202]) print( - f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + f"{icons.green_dot} The '{name}' warehouse has been created within the '{workspace}' workspace." ) @@ -67,7 +67,7 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: df = pd.DataFrame( columns=[ "Warehouse Name", - "Warehouse ID", + "Warehouse Id", "Description", "Connection Info", "Created Date", @@ -90,7 +90,7 @@ def list_warehouses(workspace: Optional[str] = None) -> pd.DataFrame: new_data = { "Warehouse Name": v.get("displayName"), - "Warehouse ID": v.get("id"), + "Warehouse Id": v.get("id"), "Description": v.get("description"), "Connection Info": prop.get("connectionInfo"), "Created Date": prop.get("createdDate"), From b153c6c53fc2be296cb905196fe75c62d87b91e1 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 30 Sep 2024 13:36:08 +0200 Subject: [PATCH 08/12] fixed changes per comments --- docs/requirements.txt | 2 +- pyproject.toml | 4 +- src/sempy_labs/_documentation.py | 39 +++++----- src/sempy_labs/_environments.py | 8 +-- src/sempy_labs/_helper_functions.py | 108 ++++++++++++++++++---------- src/sempy_labs/_model_bpa.py | 7 +- src/sempy_labs/_model_bpa_bulk.py | 13 ++-- src/sempy_labs/_vertipaq.py | 7 +- src/sempy_labs/_warehouses.py | 8 +-- 9 files changed, 114 insertions(+), 82 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index bf5422a9..6e5ae460 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -12,4 +12,4 @@ IPython polib azure.mgmt.resource jsonpath_ng -duckdb \ No newline at end of file +deltalake \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 04dbb94b..a60d7787 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "polib", "azure.mgmt.resource", "jsonpath_ng", - "duckdb", + "deltalake", ] [tool.setuptools.packages.find] @@ -47,7 +47,7 @@ test = [ Repository = "https://github.com/microsoft/semantic-link-labs.git" [[tool.mypy.overrides]] -module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*,duckdb.*" +module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*,polib.*,azure.mgmt.resource.*,jsonpath_ng.*,deltalake.*" ignore_missing_imports = true [tool.flake8] diff --git a/src/sempy_labs/_documentation.py b/src/sempy_labs/_documentation.py index e5ad8fd2..5d957489 100644 --- a/src/sempy_labs/_documentation.py +++ b/src/sempy_labs/_documentation.py @@ -83,26 +83,31 @@ def data_dictionary(dataset: str, workspace: Optional[str | None] = None): "Measure Formula": expr, } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for c in t.Columns: - if c.Type != TOM.ColumnType.RowNumber: + cols = [c for c in t.Columns if c.Type != TOM.ColumnType.RowNumber] + for c in cols: + + def get_calc_column_expression(table_name, column_name): expr = None - if tom.is_calculated_column(table_name=t.Name, column_name=c.Name): + if tom.is_calculated_column( + table_name=table_name, column_name=column_name + ): expr = c.Expression + return expr - new_data = { - "Workspace Name": workspace, - "Model Name": dataset, - "Table Name": t.Name, - "Object Type": c.ObjectType, - "Object Name": c.Name, - "Hidden Flag": c.IsHidden, - "Description": c.Description, - "Display Folder": c.DisplayFolder, - "Measure Formula": expr, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + new_data = { + "Workspace Name": workspace, + "Model Name": dataset, + "Table Name": t.Name, + "Object Type": c.ObjectType, + "Object Name": c.Name, + "Hidden Flag": c.IsHidden, + "Description": c.Description, + "Display Folder": c.DisplayFolder, + "Measure Formula": get_calc_column_expression(t.Name, c.Name), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for m in t.Measures: new_data = { "Workspace Name": workspace, diff --git a/src/sempy_labs/_environments.py b/src/sempy_labs/_environments.py index a6410356..b7d2faf5 100644 --- a/src/sempy_labs/_environments.py +++ b/src/sempy_labs/_environments.py @@ -11,14 +11,14 @@ def create_environment( - name: str, description: Optional[str] = None, workspace: Optional[str] = None + environment: str, description: Optional[str] = None, workspace: Optional[str] = None ): """ Creates a Fabric environment. Parameters ---------- - name: str + environment: str Name of the environment. description : str, default=None A description of the environment. @@ -30,7 +30,7 @@ def create_environment( (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - request_body = {"displayName": name} + request_body = {"displayName": environment} if description: request_body["description"] = description @@ -43,7 +43,7 @@ def create_environment( lro(client, response, status_codes=[201, 202]) print( - f"{icons.green_dot} The '{name}' environment has been created within the '{workspace}' workspace." + f"{icons.green_dot} The '{environment}' environment has been created within the '{workspace}' workspace." ) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 41d89e70..397500e8 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -13,11 +13,12 @@ import sempy_labs._icons as icons import urllib.parse from azure.core.credentials import TokenCredential, AccessToken +import deltalake def create_abfss_path( lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str -): +) -> str: """ Creates an abfss path for a delta table in a Fabric lakehouse. @@ -39,7 +40,7 @@ def create_abfss_path( return f"abfss://{lakehouse_workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{delta_table_name}" -def format_dax_object_name(table: str, column: str): +def format_dax_object_name(table: str, column: str) -> str: """ Formats a table/column combination to the 'Table Name'[Column Name] format. @@ -61,7 +62,7 @@ def format_dax_object_name(table: str, column: str): def create_relationship_name( from_table: str, from_column: str, to_table: str, to_column: str -): +) -> str: """ Formats a relationship's table/columns into a fully qualified name. @@ -89,7 +90,7 @@ def create_relationship_name( ) -def resolve_report_id(report: str, workspace: Optional[str] = None): +def resolve_report_id(report: str, workspace: Optional[str] = None) -> UUID: """ Obtains the ID of the Power BI report. @@ -117,7 +118,7 @@ def resolve_report_id(report: str, workspace: Optional[str] = None): return obj -def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): +def resolve_report_name(report_id: UUID, workspace: Optional[str] = None) -> str: """ Obtains the name of the Power BI report. @@ -147,7 +148,7 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): return obj -def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): +def resolve_dataset_id(dataset: str, workspace: Optional[str] = None) -> UUID: """ Obtains the ID of the semantic model. @@ -177,7 +178,7 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): return obj -def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): +def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None) -> str: """ Obtains the name of the semantic model. @@ -209,7 +210,7 @@ def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): def resolve_lakehouse_name( lakehouse_id: Optional[UUID] = None, workspace: Optional[str] = None -): +) -> str: """ Obtains the name of the Fabric lakehouse. @@ -243,7 +244,7 @@ def resolve_lakehouse_name( return obj -def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): +def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None) -> UUID: """ Obtains the ID of the Fabric lakehouse. @@ -262,9 +263,7 @@ def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): The ID of the Fabric lakehouse. """ - if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) + workspace = fabric.resolve_workspace_name(workspace) obj = fabric.resolve_item_id( item_name=lakehouse, type="Lakehouse", workspace=workspace @@ -322,7 +321,7 @@ def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None) return sqlEndpointId -def generate_embedded_filter(filter: str): +def generate_embedded_filter(filter: str) -> str: """ Converts the filter expression to a filter expression which can be used by a Power BI embedded URL. @@ -923,6 +922,19 @@ def pagination(client, response): def resolve_deployment_pipeline_id(deployment_pipeline: str) -> UUID: + """ + Obtains the Id for a given deployment pipeline. + + Parameters + ---------- + deployment_pipeline : str + The deployment pipeline name + + Returns + ------- + UUID + The deployment pipeline Id. + """ from sempy_labs._deployment_pipelines import list_deployment_pipelines @@ -969,14 +981,23 @@ def get_adls_client(account_name): return service_client -def resolve_warehouse_id(warehouse: str, workspace: Optional[str]): +def resolve_warehouse_id(warehouse: str, workspace: Optional[str]) -> UUID: + """ + Obtains the Id for a given warehouse. - workspace = fabric.resolve_workspace_name(workspace) - warehouse_id = fabric.resolve_item_id( - item_name=warehouse, type="Warehouse", workspace=workspace - ) + Parameters + ---------- + warehouse : str + The warehouse name - return warehouse_id + Returns + ------- + UUID + The warehouse Id. + """ + + workspace = fabric.resolve_workspace_name(workspace) + return fabric.resolve_item_id(item_name=warehouse, type='Warehouse', workspace=workspace) def get_language_codes(languages: str | List[str]): @@ -1025,6 +1046,7 @@ def get_azure_token_credentials( def convert_to_alphanumeric_lowercase(input_string): + # Removes non-alphanumeric characters cleaned_string = re.sub(r"[^a-zA-Z0-9]", "", input_string) cleaned_string = cleaned_string.lower() @@ -1039,36 +1061,46 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> ---------- environment: str Name of the environment. + + Returns + ------- + UUID + The environment Id. """ - from sempy_labs._environments import list_environments - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) + return fabric.resolve_item_id(item_name=environment, type='Environment', workspace=workspace) - dfE = list_environments(workspace=workspace) - dfE_filt = dfE[dfE["Environment Name"] == environment] - if len(dfE_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{environment}' environment does not exist within the '{workspace}' workspace." - ) - return dfE_filt["Environment Id"].iloc[0] +def resolve_notebook_id(notebook: str, workspace: Optional[str] = None) -> UUID: + """ + Obtains the notebook Id for a given notebook. + Parameters + ---------- + notebook: str + Name of the notebook. -def resolve_notebook_id(notebook: str, workspace: Optional[str] = None) -> UUID: + Returns + ------- + UUID + The notebook Id. + """ workspace = fabric.resolve_workspace_name(workspace) + return fabric.resolve_item_id(item_name=notebook, type='Notebook', workspace=workspace) - dfI = fabric.list_items(workspace=workspace, type="Notebook") - dfI_filt = dfI[dfI["Display Name"] == notebook] - if len(dfI_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{notebook}' notebook does not exist within the '{workspace}' workspace." - ) +def generate_guid(): + + return str(uuid.uuid4()) - return dfI_filt["Id"].iloc[0] +def get_max_run_id(table_name: str) -> int: -def generate_guid(): + table_path = f"/lakehouse/default/Tables/{table_name}/" + delta_table = deltalake.DeltaTable(table_path) + data = delta_table.to_pandas() + max_run_id = data["RunId"].max() - return str(uuid.uuid4()) + return max_run_id diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index a208dd14..3fb07dcd 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -12,6 +12,7 @@ resolve_workspace_capacity, resolve_dataset_id, get_language_codes, + get_max_run_id, ) from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached from sempy_labs.tom import connect_semantic_model @@ -22,7 +23,6 @@ from pyspark.sql.functions import col, flatten from pyspark.sql.types import StructType, StructField, StringType import os -import duckdb @log @@ -350,10 +350,7 @@ def translate_using_spark(rule_file): if len(lakeT_filt) == 0: runId = 1 else: - x = duckdb.sql( - f"""SELECT max(RunId) as max_run_id FROM delta_scan('/lakehouse/default/Tables/{delta_table_name}/') """ - ).fetchall() - max_run_id = x[0][0] + max_run_id = get_max_run_id(table_name=delta_table_name) runId = max_run_id + 1 now = datetime.datetime.now() diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py index b2221da2..ad642ef6 100644 --- a/src/sempy_labs/_model_bpa_bulk.py +++ b/src/sempy_labs/_model_bpa_bulk.py @@ -1,14 +1,17 @@ import sempy.fabric as fabric import pandas as pd import datetime -from pyspark.sql import SparkSession from sempy_labs._helper_functions import ( resolve_lakehouse_name, save_as_delta_table, resolve_workspace_capacity, retry, + get_max_run_id, +) +from sempy_labs.lakehouse import ( + get_lakehouse_tables, + lakehouse_attached, ) -from sempy_labs.lakehouse import get_lakehouse_tables, lakehouse_attached from sempy_labs._model_bpa import run_model_bpa from typing import Optional, List from sempy._utils._log import log @@ -78,7 +81,6 @@ def run_model_bpa_bulk( ] now = datetime.datetime.now() output_table = "modelbparesults" - spark = SparkSession.builder.getOrCreate() lakehouse_workspace = fabric.resolve_workspace_name() lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name( @@ -90,9 +92,8 @@ def run_model_bpa_bulk( if len(lakeT_filt) == 0: runId = 1 else: - dfSpark = spark.table(f"`{lakehouse_id}`.{output_table}").select(F.max("RunId")) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 + max_run_id = get_max_run_id(table_name=output_table) + runId = max_run_id + 1 if isinstance(workspace, str): workspace = [workspace] diff --git a/src/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py index 245cec78..14ec7c0d 100644 --- a/src/sempy_labs/_vertipaq.py +++ b/src/sempy_labs/_vertipaq.py @@ -13,6 +13,7 @@ resolve_dataset_id, save_as_delta_table, resolve_workspace_capacity, + get_max_run_id, ) from sempy_labs._list_functions import list_relationships, list_tables from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables @@ -20,7 +21,6 @@ from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons -import duckdb @log @@ -499,10 +499,7 @@ def vertipaq_analyzer( if len(lakeT_filt) == 0: runId = 1 else: - x = duckdb.sql( - f"""SELECT max(RunId) as max_run_id FROM delta_scan('/lakehouse/default/Tables/{lakeTName}/') """ - ).fetchall() - max_run_id = x[0][0] + max_run_id = get_max_run_id(table_name=lakeTName) runId = max_run_id + 1 dfMap = { diff --git a/src/sempy_labs/_warehouses.py b/src/sempy_labs/_warehouses.py index de55e5d8..a309136d 100644 --- a/src/sempy_labs/_warehouses.py +++ b/src/sempy_labs/_warehouses.py @@ -11,14 +11,14 @@ def create_warehouse( - name: str, description: Optional[str] = None, workspace: Optional[str] = None + warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None ): """ Creates a Fabric warehouse. Parameters ---------- - name: str + warehouse: str Name of the warehouse. description : str, default=None A description of the warehouse. @@ -30,7 +30,7 @@ def create_warehouse( (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - request_body = {"displayName": name} + request_body = {"displayName": warehouse} if description: request_body["description"] = description @@ -43,7 +43,7 @@ def create_warehouse( lro(client, response, status_codes=[201, 202]) print( - f"{icons.green_dot} The '{name}' warehouse has been created within the '{workspace}' workspace." + f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace}' workspace." ) From ab32521d67b60271d874dbbfc4565aecca399149 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 30 Sep 2024 14:36:52 +0200 Subject: [PATCH 09/12] test --- src/sempy_labs/_helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 397500e8..2c853fed 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -1065,7 +1065,7 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> Returns ------- UUID - The environment Id. + The environment Id. """ workspace = fabric.resolve_workspace_name(workspace) From 778a2ae82d267707fcc68b2fd6785d7601a31e38 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 30 Sep 2024 14:53:59 +0200 Subject: [PATCH 10/12] test2 --- src/sempy_labs/_helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 2c853fed..397500e8 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -1065,7 +1065,7 @@ def resolve_environment_id(environment: str, workspace: Optional[str] = None) -> Returns ------- UUID - The environment Id. + The environment Id. """ workspace = fabric.resolve_workspace_name(workspace) From 91e93269baf98013b120892c24f9076716d384a5 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 30 Sep 2024 16:07:29 +0200 Subject: [PATCH 11/12] test3 --- src/sempy_labs/_helper_functions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 397500e8..181fa7b8 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -263,7 +263,9 @@ def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None) -> UUI The ID of the Fabric lakehouse. """ - workspace = fabric.resolve_workspace_name(workspace) + if workspace is None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) obj = fabric.resolve_item_id( item_name=lakehouse, type="Lakehouse", workspace=workspace From 6ed7b3cbf0c6f6b28948d1c27bb28921803b00ab Mon Sep 17 00:00:00 2001 From: Michael Date: Tue, 1 Oct 2024 12:45:53 +0200 Subject: [PATCH 12/12] fixed issue with model dependencies (warning) --- src/sempy_labs/_model_bpa.py | 3 ++- src/sempy_labs/_model_dependencies.py | 36 +++++++++++++-------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index 3fb07dcd..7ee75694 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -511,4 +511,5 @@ def translate_using_spark(rule_file): tab_html += "" # Display the tabs, tab contents, and run the script - return display(HTML(styles + tab_html + content_html + script)) + if not export: + return display(HTML(styles + tab_html + content_html + script)) diff --git a/src/sempy_labs/_model_dependencies.py b/src/sempy_labs/_model_dependencies.py index 7eee5276..2927c078 100644 --- a/src/sempy_labs/_model_dependencies.py +++ b/src/sempy_labs/_model_dependencies.py @@ -74,7 +74,7 @@ def get_measure_dependencies( for index, dependency in dep_filt.iterrows(): d = True - if dependency[5] == "Measure": + if dependency.iloc[5] == "Measure": d = False df = pd.concat( [ @@ -85,12 +85,12 @@ def get_measure_dependencies( "Table Name": r["Table Name"], "Object Name": r["Object Name"], "Object Type": r["Object Type"], - "Referenced Object": dependency[4], - "Referenced Table": dependency[3], - "Referenced Object Type": dependency[5], + "Referenced Object": dependency.iloc[4], + "Referenced Table": dependency.iloc[3], + "Referenced Object Type": dependency.iloc[5], "Done": d, "Full Object Name": r["Full Object Name"], - "Referenced Full Object Name": dependency[ + "Referenced Full Object Name": dependency.iloc[ 7 ], "Parent Node": rObj, @@ -110,12 +110,12 @@ def get_measure_dependencies( "Table Name": r["Table Name"], "Object Name": r["Object Name"], "Object Type": r["Object Type"], - "Referenced Object": dependency[4], - "Referenced Table": dependency[3], - "Referenced Object Type": dependency[5], + "Referenced Object": dependency.iloc[4], + "Referenced Table": dependency.iloc[3], + "Referenced Object Type": dependency.iloc[5], "Done": d, "Full Object Name": r["Full Object Name"], - "Referenced Full Object Name": dependency[ + "Referenced Full Object Name": dependency.iloc[ 7 ], "Parent Node": rObj, @@ -203,7 +203,7 @@ def get_model_calc_dependencies( for index, dependency in dep_filt.iterrows(): d = True - if dependency[5] in objs: + if dependency.iloc[5] in objs: d = False df = pd.concat( [ @@ -214,12 +214,12 @@ def get_model_calc_dependencies( "Table Name": r["Table Name"], "Object Name": r["Object Name"], "Object Type": r["Object Type"], - "Referenced Object": dependency[4], - "Referenced Table": dependency[3], - "Referenced Object Type": dependency[5], + "Referenced Object": dependency.iloc[4], + "Referenced Table": dependency.iloc[3], + "Referenced Object Type": dependency.iloc[5], "Done": d, "Full Object Name": r["Full Object Name"], - "Referenced Full Object Name": dependency[ + "Referenced Full Object Name": dependency.iloc[ 7 ], "Parent Node": rObj, @@ -239,12 +239,12 @@ def get_model_calc_dependencies( "Table Name": r["Table Name"], "Object Name": r["Object Name"], "Object Type": r["Object Type"], - "Referenced Object": dependency[5], - "Referenced Table": dependency[4], - "Referenced Object Type": dependency[6], + "Referenced Object": dependency.iloc[5], + "Referenced Table": dependency.iloc[4], + "Referenced Object Type": dependency.iloc[6], "Done": d, "Full Object Name": r["Full Object Name"], - "Referenced Full Object Name": dependency[ + "Referenced Full Object Name": dependency.iloc[ 7 ], "Parent Node": rObj,