From ecbd179324b8c5cd71afc6af1a9a2f7763f6ae3a Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 16:37:39 +0000 Subject: [PATCH 01/23] add missing import --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index ad8b4fa4..6706941e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,2 @@ -semantic-link-sempy \ No newline at end of file +semantic-link-sempy +sphinx_rtd_theme \ No newline at end of file From 9cd7b373cb3c8f833121e1b0b79ce3b7b1d18ea5 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 17:50:24 +0000 Subject: [PATCH 02/23] add docs badge --- README.md | 2 ++ docs/source/conf.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a119284..dfc0df0a 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs) +[![Read The Docs](https://readthedocs.org/projects//badge/?version=0.4.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) + This is a python library intended to be used in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library was originally intended to contain functions used for [migrating semantic models to Direct Lake mode](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration-1). However, it quickly became apparent that functions within such a library could support many other useful activities in the realm of semantic models, reports, lakehouses and really anything Fabric-related. As such, this library contains a variety of functions ranging from running [Vertipaq Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#vertipaq_analyzer) or the [Best Practice Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#run_model_bpa) against a semantic model to seeing if any [lakehouse tables hit Direct Lake guardrails](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#get_lakehouse_tables) or accessing the [Tabular Object Model](https://github.com/microsoft/semantic-link-labs/#tabular-object-model-tom) and more! Instructions for migrating import/DirectQuery semantic models to Direct Lake mode can be found [here](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration-1). diff --git a/docs/source/conf.py b/docs/source/conf.py index beb4d783..3d01adaf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ project = 'semantic-link-labs' copyright = '2024, Microsoft and community' author = 'Microsoft and community' -release = '0.4.0' +release = '0.4.1' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration From 4e67d8e9e2c142f8f3dc3df9aa523c17f7a06562 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 17:56:24 +0000 Subject: [PATCH 03/23] remove empty line --- docs/source/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 3d01adaf..564fac01 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -24,7 +24,6 @@ exclude_patterns = [] - # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output From 2495b6f9a774c60dbdada2e4831419f6696d3e94 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 18:00:24 +0000 Subject: [PATCH 04/23] automate api doc generation --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 72b028e7..63fd2e1d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,6 +14,8 @@ build: # nodejs: "19" # rust: "1.64" # golang: "1.19" + pre_build: + sphinx-apidoc -f -o docs/source sempy_labs/ # Build documentation in the "docs/" directory with Sphinx sphinx: From 84f89160c3c6f4e91e2558776b7673d255927313 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 18:02:45 +0000 Subject: [PATCH 05/23] fix RTD build --- .readthedocs.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 63fd2e1d..7918a843 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,8 +14,9 @@ build: # nodejs: "19" # rust: "1.64" # golang: "1.19" - pre_build: - sphinx-apidoc -f -o docs/source sempy_labs/ + jobs: + pre_build: + - sphinx-apidoc -f -o docs/source sempy_labs/ # Build documentation in the "docs/" directory with Sphinx sphinx: From d6996995f763eea6dcb12cf99f703fa049377618 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 18:14:19 +0000 Subject: [PATCH 06/23] add missing package --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 6706941e..b743b3fc 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ semantic-link-sempy -sphinx_rtd_theme \ No newline at end of file +sphinx_rtd_theme +pandas==2.0.3 \ No newline at end of file From 89f19299ee92c4a818838f92a352a80a8c7590da Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 18:32:11 +0000 Subject: [PATCH 07/23] exclude pyspark --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 564fac01..19d493e4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,6 +31,6 @@ html_static_path = ['_static'] # List of packages we don't want to install in the environment -autodoc_mock_imports = ['delta', 'synapse'] +autodoc_mock_imports = ['delta', 'synapse', 'pyspark'] napoleon_numpy_docstring = True \ No newline at end of file From 0897e9e12171872f410c09045180022cc6352178 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 18:33:12 +0000 Subject: [PATCH 08/23] fix escaping --- sempy_labs/Vertipaq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sempy_labs/Vertipaq.py b/sempy_labs/Vertipaq.py index f88e08b3..ca50a740 100644 --- a/sempy_labs/Vertipaq.py +++ b/sempy_labs/Vertipaq.py @@ -397,7 +397,7 @@ def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Opt filePath = os.path.join(subFolderPath, fileName) + ext if os.path.exists(filePath): os.remove(filePath) - print(f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer\{zipFileName}' in the default lakehouse attached to this notebook.") + print(f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook.") def visualize_vertipaq(dataframes): From 94433818197b26a51f6c45e56fdcf0da5a92a8e9 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Thu, 6 Jun 2024 19:33:04 +0000 Subject: [PATCH 09/23] add pyspark dependency --- docs/requirements.txt | 3 ++- docs/source/conf.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b743b3fc..8d08db7b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ semantic-link-sempy sphinx_rtd_theme -pandas==2.0.3 \ No newline at end of file +pandas==2.0.3 +pyspark==3.5.0 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 19d493e4..564fac01 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,6 +31,6 @@ html_static_path = ['_static'] # List of packages we don't want to install in the environment -autodoc_mock_imports = ['delta', 'synapse', 'pyspark'] +autodoc_mock_imports = ['delta', 'synapse'] napoleon_numpy_docstring = True \ No newline at end of file From 1f04954c84a76b99febde2770502fa7f525aee43 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 7 Jun 2024 12:56:42 +0000 Subject: [PATCH 10/23] fix packages --- docs/requirements.txt | 2 +- docs/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8d08db7b..b7fcb1f9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ semantic-link-sempy sphinx_rtd_theme pandas==2.0.3 -pyspark==3.5.0 \ No newline at end of file +# pyspark==3.5.0 \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 564fac01..903744fb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,6 +31,6 @@ html_static_path = ['_static'] # List of packages we don't want to install in the environment -autodoc_mock_imports = ['delta', 'synapse'] +autodoc_mock_imports = ['delta', 'synapse', 'jwt', 'semantic-link-sempy', 'pyspark'] napoleon_numpy_docstring = True \ No newline at end of file From b53773a165e1691397ae11e78afef128ed6ae7f5 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 7 Jun 2024 12:57:42 +0000 Subject: [PATCH 11/23] hide module --- sempy_labs/__init__.py | 1 + sempy_labs/{ClearCache.py => _clear_cache.py} | 0 2 files changed, 1 insertion(+) rename sempy_labs/{ClearCache.py => _clear_cache.py} (100%) diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py index e69de29b..bc1d8850 100644 --- a/sempy_labs/__init__.py +++ b/sempy_labs/__init__.py @@ -0,0 +1 @@ +from sempy_labs._clear_cache import clear_cache as clear_cache \ No newline at end of file diff --git a/sempy_labs/ClearCache.py b/sempy_labs/_clear_cache.py similarity index 100% rename from sempy_labs/ClearCache.py rename to sempy_labs/_clear_cache.py From 7e42396dd134a80c0a35df25767c0cc69a0a1066 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Fri, 7 Jun 2024 13:35:22 +0000 Subject: [PATCH 12/23] more packages --- docs/requirements.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b7fcb1f9..e42cbf45 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,8 @@ semantic-link-sempy sphinx_rtd_theme pandas==2.0.3 -# pyspark==3.5.0 \ No newline at end of file +# pyspark==3.5.0 +azure-identity==1.7.1 +azure-keyvault-secrets +azure-storage-file-datalake==12.3.1 +azure-storage-blob>=12.9.0 \ No newline at end of file From f6a7f309f69c2d3429ef2561940cba3415a33db3 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Sat, 8 Jun 2024 09:40:27 +0000 Subject: [PATCH 13/23] cleanup for docs --- docs/requirements.txt | 4 +- sempy_labs/AI.py | 409 ++-- sempy_labs/Connections.py | 212 +- sempy_labs/CreatePQTFile.py | 191 -- sempy_labs/DirectLakeSchemaCompare.py | 87 - sempy_labs/GenerateReport.py | 255 --- sempy_labs/GetLakehouseTables.py | 159 -- sempy_labs/GetMeasureDependencies.py | 203 +- sempy_labs/GetSemanticModelBim.py | 59 +- sempy_labs/Guardrails.py | 37 +- sempy_labs/LogAnalytics.py | 46 +- sempy_labs/MeasureDependencyTree.py | 39 +- sempy_labs/MigrateCalcTablesToLakehouse.py | 311 --- .../MigrateCalcTablesToSemanticModel.py | 123 -- .../MigrateModelObjectsToSemanticModel.py | 324 --- .../MigrateTablesColumnsToSemanticModel.py | 135 -- sempy_labs/MigrationValidation.py | 133 -- sempy_labs/ModelAutoBuild.py | 150 +- sempy_labs/ModelBPA.py | 1599 +++++++++----- sempy_labs/OneLakeIntegration.py | 124 +- sempy_labs/QSO.py | 289 ++- sempy_labs/RefreshCalcTables.py | 107 +- sempy_labs/RefreshSemanticModel.py | 122 +- sempy_labs/ReportFunctions.py | 742 ------- .../ShowUnsupportedDirectLakeObjects.py | 68 - sempy_labs/TOM.py | 1926 +++++++++++------ sempy_labs/Translations.py | 385 +++- sempy_labs/Vertipaq.py | 976 ++++++--- sempy_labs/WarmCache.py | 187 +- sempy_labs/__init__.py | 28 +- sempy_labs/_clear_cache.py | 22 +- ...del.py => _create_blank_semantic_model.py} | 40 +- sempy_labs/_create_pqt_file.py | 238 ++ sempy_labs/{Fallback.py => _fallback.py} | 45 +- ...icModel.py => _generate_semantic_model.py} | 105 +- ...elperFunctions.py => _helper_functions.py} | 243 ++- sempy_labs/_icons.py | 4 + .../{ListFunctions.py => _list_functions.py} | 925 +++++--- sempy_labs/directlake/__init__.py | 24 + .../directlake/_directlake_schema_compare.py | 108 + .../_directlake_schema_sync.py} | 97 +- .../_get_directlake_lakehouse.py} | 34 +- .../_get_shared_expression.py} | 39 +- .../_list_directlake_model_calc_tables.py} | 26 +- .../_show_unsupported_directlake_objects.py | 88 + ..._directlake_model_lakehouse_connection.py} | 53 +- .../_update_directlake_partition_entity.py} | 46 +- sempy_labs/lakehouse/__init__.py | 10 + .../_get_lakehouse_columns.py} | 48 +- sempy_labs/lakehouse/_get_lakehouse_tables.py | 248 +++ .../{Lakehouse.py => lakehouse/_lakehouse.py} | 49 +- sempy_labs/migration/__init__.py | 16 + .../_migrate_calctables_to_lakehouse.py | 433 ++++ .../_migrate_calctables_to_semantic_model.py | 153 ++ ...migrate_model_objects_to_semantic_model.py | 524 +++++ ...igrate_tables_columns_to_semantic_model.py | 169 ++ sempy_labs/migration/_migration_validation.py | 230 ++ sempy_labs/report/__init__.py | 15 + sempy_labs/report/_generate_report.py | 260 +++ sempy_labs/report/_report_functions.py | 869 ++++++++ .../_report_rebind.py} | 69 +- sempy_labs/shortcuts.py | 237 +- 62 files changed, 9307 insertions(+), 5590 deletions(-) delete mode 100644 sempy_labs/CreatePQTFile.py delete mode 100644 sempy_labs/DirectLakeSchemaCompare.py delete mode 100644 sempy_labs/GenerateReport.py delete mode 100644 sempy_labs/GetLakehouseTables.py delete mode 100644 sempy_labs/MigrateCalcTablesToLakehouse.py delete mode 100644 sempy_labs/MigrateCalcTablesToSemanticModel.py delete mode 100644 sempy_labs/MigrateModelObjectsToSemanticModel.py delete mode 100644 sempy_labs/MigrateTablesColumnsToSemanticModel.py delete mode 100644 sempy_labs/MigrationValidation.py delete mode 100644 sempy_labs/ReportFunctions.py delete mode 100644 sempy_labs/ShowUnsupportedDirectLakeObjects.py rename sempy_labs/{CreateBlankSemanticModel.py => _create_blank_semantic_model.py} (57%) create mode 100644 sempy_labs/_create_pqt_file.py rename sempy_labs/{Fallback.py => _fallback.py} (55%) rename sempy_labs/{GenerateSemanticModel.py => _generate_semantic_model.py} (56%) rename sempy_labs/{HelperFunctions.py => _helper_functions.py} (64%) create mode 100644 sempy_labs/_icons.py rename sempy_labs/{ListFunctions.py => _list_functions.py} (55%) create mode 100644 sempy_labs/directlake/__init__.py create mode 100644 sempy_labs/directlake/_directlake_schema_compare.py rename sempy_labs/{DirectLakeSchemaSync.py => directlake/_directlake_schema_sync.py} (50%) rename sempy_labs/{GetDirectLakeLakehouse.py => directlake/_get_directlake_lakehouse.py} (69%) rename sempy_labs/{GetSharedExpression.py => directlake/_get_shared_expression.py} (54%) rename sempy_labs/{ListDirectLakeModelCalcTables.py => directlake/_list_directlake_model_calc_tables.py} (68%) create mode 100644 sempy_labs/directlake/_show_unsupported_directlake_objects.py rename sempy_labs/{UpdateDirectLakeModelLakehouseConnection.py => directlake/_update_directlake_model_lakehouse_connection.py} (51%) rename sempy_labs/{UpdateDirectLakePartitionEntity.py => directlake/_update_directlake_partition_entity.py} (58%) create mode 100644 sempy_labs/lakehouse/__init__.py rename sempy_labs/{GetLakehouseColumns.py => lakehouse/_get_lakehouse_columns.py} (58%) create mode 100644 sempy_labs/lakehouse/_get_lakehouse_tables.py rename sempy_labs/{Lakehouse.py => lakehouse/_lakehouse.py} (67%) create mode 100644 sempy_labs/migration/__init__.py create mode 100644 sempy_labs/migration/_migrate_calctables_to_lakehouse.py create mode 100644 sempy_labs/migration/_migrate_calctables_to_semantic_model.py create mode 100644 sempy_labs/migration/_migrate_model_objects_to_semantic_model.py create mode 100644 sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py create mode 100644 sempy_labs/migration/_migration_validation.py create mode 100644 sempy_labs/report/__init__.py create mode 100644 sempy_labs/report/_generate_report.py create mode 100644 sempy_labs/report/_report_functions.py rename sempy_labs/{ReportRebind.py => report/_report_rebind.py} (67%) diff --git a/docs/requirements.txt b/docs/requirements.txt index e42cbf45..75a5603a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,4 +5,6 @@ pandas==2.0.3 azure-identity==1.7.1 azure-keyvault-secrets azure-storage-file-datalake==12.3.1 -azure-storage-blob>=12.9.0 \ No newline at end of file +azure-storage-blob>=12.9.0 +anytree +IPython \ No newline at end of file diff --git a/sempy_labs/AI.py b/sempy_labs/AI.py index e70aaa30..c6ecd9fd 100644 --- a/sempy_labs/AI.py +++ b/sempy_labs/AI.py @@ -6,66 +6,94 @@ from pyspark.sql import SparkSession from typing import List, Optional, Union + def optimize_semantic_model(dataset: str, workspace: Optional[str] = None): from .ModelBPA import run_model_bpa - from .Fallback import check_fallback_reason - from .HelperFunctions import format_dax_object_name - - modelBPA = run_model_bpa(dataset = dataset, workspace = workspace, return_dataframe = True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended = True) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC['Total Size'] = dfC['Total Size'].astype('int') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) + from ._fallback import check_fallback_reason + from ._helper_functions import format_dax_object_name - modelBPA_col = modelBPA[modelBPA['Object Type'] == 'Column'] - modelBPA_col = pd.merge(modelBPA_col, dfC[['Column Object', 'Total Size']], left_on = 'Object Name', right_on = 'Column Object', how = 'left') + modelBPA = run_model_bpa( + dataset=dataset, workspace=workspace, return_dataframe=True + ) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfC["Total Size"] = dfC["Total Size"].astype("int") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + + modelBPA_col = modelBPA[modelBPA["Object Type"] == "Column"] + modelBPA_col = pd.merge( + modelBPA_col, + dfC[["Column Object", "Total Size"]], + left_on="Object Name", + right_on="Column Object", + how="left", + ) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) if isDirectLake: - fallback = check_fallback_reason(dataset = dataset, workspace = workspace) - fallback_filt = fallback[fallback['FallbackReasonID']== 2] + fallback = check_fallback_reason(dataset=dataset, workspace=workspace) + fallback_filt = fallback[fallback["FallbackReasonID"] == 2] if len(fallback_filt) > 0: - print(f"The '{dataset}' semantic model is a Direct Lake semantic model which contains views. Since views always fall back to DirectQuery, it is recommended to only use lakehouse tables and not views.") + print( + f"The '{dataset}' semantic model is a Direct Lake semantic model which contains views. Since views always fall back to DirectQuery, it is recommended to only use lakehouse tables and not views." + ) # Potential model reduction estimate - ruleNames = ['Remove unnecessary columns','Set IsAvailableInMdx to false on non-attribute columns'] + ruleNames = [ + "Remove unnecessary columns", + "Set IsAvailableInMdx to false on non-attribute columns", + ] for rule in ruleNames: - df = modelBPA_col[modelBPA_col['Rule Name'] == rule] - df_filt = df[['Object Name', 'Total Size']].sort_values(by='Total Size', ascending=False) - totSize = df['Total Size'].sum() + df = modelBPA_col[modelBPA_col["Rule Name"] == rule] + df_filt = df[["Object Name", "Total Size"]].sort_values( + by="Total Size", ascending=False + ) + totSize = df["Total Size"].sum() if len(df_filt) > 0: - print(f"Potential savings of {totSize} bytes from following the '{rule}' rule.") + print( + f"Potential savings of {totSize} bytes from following the '{rule}' rule." + ) display(df_filt) else: print(f"The '{rule}' rule has been followed.") -def generate_measure_descriptions(dataset: str, measures: Union[str,List[str]], gpt_model: Optional[str] = 'gpt-35-turbo', workspace: Optional[str] = None): +def generate_measure_descriptions( + dataset: str, + measures: Union[str, List[str]], + gpt_model: Optional[str] = "gpt-35-turbo", + workspace: Optional[str] = None, +): - service_name = 'synapseml-openai' + service_name = "synapseml-openai" if isinstance(measures, str): measures = [measures] - validModels = ['gpt-35-turbo', 'gpt-35-turbo-16k', 'gpt-4'] + validModels = ["gpt-35-turbo", "gpt-35-turbo-16k", "gpt-4"] if gpt_model not in validModels: - print(f"The '{gpt_model}' model is not a valid model. Enter a gpt_model from this list: {validModels}.") + print( + f"The '{gpt_model}' model is not a valid model. Enter a gpt_model from this list: {validModels}." + ) return - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) if measures is not None: - dfM_filt = dfM[dfM['Measure Name'].isin(measures)] + dfM_filt = dfM[dfM["Measure Name"].isin(measures)] else: dfM_filt = dfM - df = dfM_filt[['Table Name', 'Measure Name', 'Measure Expression']] + df = dfM_filt[["Table Name", "Measure Name", "Measure Expression"]] - df['prompt'] = f"The following is DAX code used by Microsoft Power BI. Please explain this code in simple terms:" +df['Measure Expression'] + df["prompt"] = ( + f"The following is DAX code used by Microsoft Power BI. Please explain this code in simple terms:" + + df["Measure Expression"] + ) # Generate new column in df dataframe which has the AI-generated descriptions completion = { @@ -73,43 +101,55 @@ def generate_measure_descriptions(dataset: str, measures: Union[str,List[str]], .setDeploymentName(gpt_model) .setMaxTokens(200) .setCustomServiceName(service_name) - .setPromptCol('prompt') - .setErrorCol('error') - .setOutputCol('completions') + .setPromptCol("prompt") + .setErrorCol("error") + .setOutputCol("completions") } completed_df = completion.transform(df).cache() completed_df.select( - col('prompt'), - col('error'), - col('completions.choices.text').getItem(0).alias('text'), + col("prompt"), + col("error"), + col("completions.choices.text").getItem(0).alias("text"), ) # Update the model to use the new descriptions tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - #for t in m.Tables: - #tName = t.Name - #for ms in t.Measures: - #mName = ms.Name - #mDesc = promptValue + # for t in m.Tables: + # tName = t.Name + # for ms in t.Measures: + # mName = ms.Name + # mDesc = promptValue + + # m.SaveChanges() - #m.SaveChanges() -def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], workspace: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def generate_aggs( + dataset: str, + table_name: str, + columns: Union[str, List[str]], + workspace: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + + from ._helper_functions import ( + get_direct_lake_sql_endpoint, + create_abfss_path, + format_dax_object_name, + resolve_lakehouse_id, + ) - from .HelperFunctions import get_direct_lake_sql_endpoint, create_abfss_path, format_dax_object_name, resolve_lakehouse_id - sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM import System - #columns = { + # columns = { #'SalesAmount': 'Sum', #'ProductKey': 'GroupBy', #'OrderDateKey': 'GroupBy' - #} + # } if workspace == None: workspace_id = fabric.get_workspace_id() @@ -125,71 +165,87 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], columns = [columns] columnValues = columns.keys() - - aggTypes = ['Sum', 'Count', 'Min', 'Max', 'GroupBy'] - aggTypesAggregate = ['Sum', 'Count', 'Min', 'Max'] - numericTypes = ['Int64', 'Double', 'Decimal'] + + aggTypes = ["Sum", "Count", "Min", "Max", "GroupBy"] + aggTypesAggregate = ["Sum", "Count", "Min", "Max"] + numericTypes = ["Int64", "Double", "Decimal"] if any(value not in aggTypes for value in columns.values()): - print(f"Invalid aggregation type(s) have been specified in the 'columns' parameter. Valid aggregation types: {aggTypes}.") + print( + f"Invalid aggregation type(s) have been specified in the 'columns' parameter. Valid aggregation types: {aggTypes}." + ) return - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode. This function is only relevant for Direct Lake semantic models.") + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode. This function is only relevant for Direct Lake semantic models." + ) return - - dfC_filtT = dfC[dfC['Table Name'] == table_name] + + dfC_filtT = dfC[dfC["Table Name"] == table_name] if len(dfC_filtT) == 0: - print(f"The '{table_name}' table does not exist in the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The '{table_name}' table does not exist in the '{dataset}' semantic model within the '{workspace}' workspace." + ) return - - dfC_filt = dfC[(dfC['Table Name'] == table_name) & (dfC['Column Name'].isin(columnValues))] + + dfC_filt = dfC[ + (dfC["Table Name"] == table_name) & (dfC["Column Name"].isin(columnValues)) + ] if len(columns) != len(dfC_filt): - print(f"Columns listed in '{columnValues}' do not exist in the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"Columns listed in '{columnValues}' do not exist in the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace." + ) return - + # Check if doing sum/count/min/max etc. on a non-number column - for col,agg in columns.items(): - dfC_col = dfC_filt[dfC_filt['Column Name'] == col] - dataType = dfC_col['Data Type'].iloc[0] + for col, agg in columns.items(): + dfC_col = dfC_filt[dfC_filt["Column Name"] == col] + dataType = dfC_col["Data Type"].iloc[0] if agg in aggTypesAggregate and dataType not in numericTypes: - print(f"The '{col}' column in the '{table_name}' table is of '{dataType}' data type. Only columns of '{numericTypes}' data types can be aggregated as '{aggTypesAggregate}' aggregation types.") + print( + f"The '{col}' column in the '{table_name}' table is of '{dataType}' data type. Only columns of '{numericTypes}' data types can be aggregated as '{aggTypesAggregate}' aggregation types." + ) return # Create/update lakehouse delta agg table - aggSuffix = '_agg' + aggSuffix = "_agg" aggTableName = f"{table_name}{aggSuffix}" - aggLakeTName = aggTableName.lower().replace(' ','_') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Table Name'] == table_name] - lakeTName = dfP_filt['Query'].iloc[0] + aggLakeTName = aggTableName.lower().replace(" ", "_") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Table Name"] == table_name] + lakeTName = dfP_filt["Query"].iloc[0] - sqlEndpointId = get_direct_lake_sql_endpoint(dataset = dataset, workspace = workspace) + sqlEndpointId = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] if len(dfI_filt) == 0: - print(f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.") + print( + f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter." + ) return - - lakehouseName = dfI_filt['Display Name'].iloc[0] - lakehouse_id = resolve_lakehouse_id(lakehouse = lakehouseName, workspace = lakehouse_workspace) + + lakehouseName = dfI_filt["Display Name"].iloc[0] + lakehouse_id = resolve_lakehouse_id( + lakehouse=lakehouseName, workspace=lakehouse_workspace + ) # Generate SQL query - query = 'SELECT' - groupBy = '\nGROUP BY' + query = "SELECT" + groupBy = "\nGROUP BY" for col, agg in columns.items(): - colFilt = dfC_filt[dfC_filt['Column Name'] == col] - sourceCol = colFilt['Source'].iloc[0] + colFilt = dfC_filt[dfC_filt["Column Name"] == col] + sourceCol = colFilt["Source"].iloc[0] - if agg == 'GroupBy': + if agg == "GroupBy": query = f"{query}\n{sourceCol}," groupBy = f"{groupBy}\n{sourceCol}," else: @@ -198,12 +254,16 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], query = query[:-1] spark = SparkSession.builder.getOrCreate() - fromTablePath = create_abfss_path(lakehouse_id=lakehouse_id, lakehouse_workspace_id=lakehouse_workspace_id, delta_table_name=lakeTName) + fromTablePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=lakeTName, + ) df = spark.read.format("delta").load(fromTablePath) - tempTableName = 'delta_table_' + lakeTName + tempTableName = "delta_table_" + lakeTName df.createOrReplaceTempView(tempTableName) sqlQuery = f"{query} \n FROM {tempTableName} {groupBy}" - + sqlQuery = sqlQuery[:-1] print(sqlQuery) @@ -211,20 +271,24 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], spark_df = spark.sql(sqlQuery) f"\nCreating/updating the '{aggLakeTName}' table in the lakehouse..." # Write spark dataframe to delta table - aggFilePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = lakehouse_workspace_id, delta_table_name = aggLakeTName) - spark_df.write.mode('overwrite').format('delta').save(aggFilePath) + aggFilePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=aggLakeTName, + ) + spark_df.write.mode("overwrite").format("delta").save(aggFilePath) f"The '{aggLakeTName}' table has been created/updated in the lakehouse." # Create/update semantic model agg table tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model f"\nUpdating the '{dataset}' semantic model..." - dfC_agg = dfC[dfC['Table Name'] == aggTableName] + dfC_agg = dfC[dfC["Table Name"] == aggTableName] if len(dfC_agg) == 0: print(f"Creating the '{aggTableName}' table...") - exp = m.Expressions['DatabaseQuery'] - tbl = TOM.Table() + exp = m.Expressions["DatabaseQuery"] + tbl = TOM.Table() tbl.Name = aggTableName tbl.IsHidden = True @@ -241,9 +305,9 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], tbl.Partitions.Add(part) for i, r in dfC_filt.iterrows(): - scName = r['Source'] - cName = r['Column Name'] - dType = r['Data Type'] + scName = r["Source"] + cName = r["Column Name"] + dType = r["Data Type"] col = TOM.DataColumn() col.Name = cName @@ -252,10 +316,14 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], col.DataType = System.Enum.Parse(TOM.DataType, dType) tbl.Columns.Add(col) - print(f"The '{aggTableName}'[{cName}] column has been added to the '{dataset}' semantic model.") + print( + f"The '{aggTableName}'[{cName}] column has been added to the '{dataset}' semantic model." + ) m.Tables.Add(tbl) - print(f"The '{aggTableName}' table has been added to the '{dataset}' semantic model.") + print( + f"The '{aggTableName}' table has been added to the '{dataset}' semantic model." + ) else: print(f"Updating the '{aggTableName}' table's columns...") # Remove existing columns @@ -267,9 +335,9 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], m.Tables[tName].Columns.Remove(cName) # Add columns for i, r in dfC_filt.iterrows(): - scName = r['Source'] - cName = r['Column Name'] - dType = r['Data Type'] + scName = r["Source"] + cName = r["Column Name"] + dType = r["Data Type"] col = TOM.DataColumn() col.Name = cName @@ -281,58 +349,68 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], print(f"The '{aggTableName}'[{cName}] column has been added.") # Create relationships - relMap = { - 'm': 'Many', - '1': 'One', - '0': 'None' - } + relMap = {"m": "Many", "1": "One", "0": "None"} print(f"\nGenerating necessary relationships...") for i, r in dfR.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - cfb = r['Cross Filtering Behavior'] - sfb = r['Security Filtering Behavior'] - mult = r['Multiplicity'] - - crossFB = System.Enum.Parse(TOM.CrossFilteringBehavior,cfb) - secFB = System.Enum.Parse(TOM.SecurityFilteringBehavior,sfb) - fromCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, relMap.get(mult[0])) - toCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, relMap.get(mult[-1])) - + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + cfb = r["Cross Filtering Behavior"] + sfb = r["Security Filtering Behavior"] + mult = r["Multiplicity"] + + crossFB = System.Enum.Parse(TOM.CrossFilteringBehavior, cfb) + secFB = System.Enum.Parse(TOM.SecurityFilteringBehavior, sfb) + fromCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, relMap.get(mult[0]) + ) + toCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, relMap.get(mult[-1]) + ) + rel = TOM.SingleColumnRelationship() rel.FromCardinality = fromCardinality rel.ToCardinality = toCardinality - rel.IsActive = r['Active'] + rel.IsActive = r["Active"] rel.CrossFilteringBehavior = crossFB rel.SecurityFilteringBehavior = secFB - rel.RelyOnReferentialIntegrity = r['Rely On Referential Integrity'] + rel.RelyOnReferentialIntegrity = r["Rely On Referential Integrity"] if fromTable == table_name: try: rel.FromColumn = m.Tables[aggTableName].Columns[fromColumn] m.Relationships.Add(rel) - print(f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has been added.") + print( + f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has been added." + ) except: - print(f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has not been created.") - elif toTable == table_name: + print( + f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has not been created." + ) + elif toTable == table_name: try: rel.ToColumn = m.Tables[aggTableName].Columns[toColumn] m.Relationships.Add(rel) - print(f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has been added.") + print( + f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has been added." + ) except: - print(f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has not been created.") + print( + f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has not been created." + ) f"Relationship creation is complete." # Create IF measure f"\nCreating measure to check if the agg table can be used..." - aggChecker = 'IF(' - dfR_filt = dfR[(dfR['From Table'] == table_name) & (~dfR['From Column'].isin(columnValues))] + aggChecker = "IF(" + dfR_filt = dfR[ + (dfR["From Table"] == table_name) & (~dfR["From Column"].isin(columnValues)) + ] for i, r in dfR_filt.iterrows(): - toTable = r['To Table'] + toTable = r["To Table"] aggChecker = f"{aggChecker}\nISCROSSFILTERED('{toTable}') ||" aggChecker = aggChecker[:-3] @@ -342,7 +420,10 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], # Todo: add IFISFILTERED clause for columns f"\n Creating the base measures in the agg table..." # Create base agg measures - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = """ + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TABLE] AS [Table Name] ,[OBJECT] AS [Object Name] @@ -352,27 +433,32 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY WHERE [OBJECT_TYPE] = 'MEASURE' - """) - - baseMeasures = dep[(dep['Referenced Object Type'] == 'COLUMN') & (dep['Referenced Table'] == table_name) & (dep['Referenced Object'].isin(columnValues))] + """, + ) + + baseMeasures = dep[ + (dep["Referenced Object Type"] == "COLUMN") + & (dep["Referenced Table"] == table_name) + & (dep["Referenced Object"].isin(columnValues)) + ] for i, r in baseMeasures.iterrows(): - tName = r['Table Name'] - mName = r['Object Name'] - cName = r['Referenced Object'] - dfM_filt = dfM[dfM['Measure Name'] == mName] - expr = dfM_filt['Measure Expression'].iloc[0] + tName = r["Table Name"] + mName = r["Object Name"] + cName = r["Referenced Object"] + dfM_filt = dfM[dfM["Measure Name"] == mName] + expr = dfM_filt["Measure Expression"].iloc[0] colFQNonAgg = format_dax_object_name(tName, cName) colFQAgg = format_dax_object_name(aggTableName, cName) colNQNonAgg = f"{tName}[{cName}]" - if ' ' in tName: - newExpr = expr.replace(colFQNonAgg,colFQAgg) + if " " in tName: + newExpr = expr.replace(colFQNonAgg, colFQAgg) else: - newExpr = expr.replace(colFQNonAgg, colFQAgg).replace(colNQNonAgg,colFQAgg) + newExpr = expr.replace(colFQNonAgg, colFQAgg).replace(colNQNonAgg, colFQAgg) print(expr) print(newExpr) - + aggMName = mName + aggSuffix measure = TOM.Measure() measure.Name = aggMName @@ -380,39 +466,30 @@ def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], measure.Expression = newExpr m.Tables[aggTableName].Measures.Add(measure) f"The '{aggMName}' measure has been created in the '{aggTableName}' table." - - # Update base detail measures - - #m.SaveChanges() - - - - - - + # Update base detail measures + # m.SaveChanges() # Identify views used within Direct Lake model -#workspace = 'MK Demo 6' -#lakehouse = 'MyLakehouse' -#dataset = 'MigrationTest' -#lakehouse_workspace = workspace - -#dfView = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'View Name']) -#dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) -#isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - -#spark = SparkSession.builder.getOrCreate() -#views = spark.sql(f"SHOW VIEWS IN {lakehouse}").collect() -#for view in views: +# workspace = 'MK Demo 6' +# lakehouse = 'MyLakehouse' +# dataset = 'MigrationTest' +# lakehouse_workspace = workspace + +# dfView = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'View Name']) +# dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) +# isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) + +# spark = SparkSession.builder.getOrCreate() +# views = spark.sql(f"SHOW VIEWS IN {lakehouse}").collect() +# for view in views: # viewName = view['viewName'] # isTemporary = view['isTemporary'] # new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'View Name': viewName} # dfView = pd.concat([dfView, pd.DataFrame(new_data, index=[0])], ignore_index=True) -#dfView -#lakeT = get_lakehouse_tables(lakehouse, lakehouse_workspace) -#if not dfP['Query'].isin(lakeT['Table Name'].values): +# dfView +# lakeT = get_lakehouse_tables(lakehouse, lakehouse_workspace) +# if not dfP['Query'].isin(lakeT['Table Name'].values): # if - diff --git a/sempy_labs/Connections.py b/sempy_labs/Connections.py index fe97202f..de310ee6 100644 --- a/sempy_labs/Connections.py +++ b/sempy_labs/Connections.py @@ -3,11 +3,32 @@ import pandas as pd from typing import List, Optional, Union -def create_connection_cloud(name: str, server_name: str, database_name: str, user_name: str, password: str, privacy_level: str): - #https://review.learn.microsoft.com/en-us/rest/api/fabric/core/connections/create-connection?branch=features%2Fdmts&tabs=HTTP - - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) +def create_connection_cloud( + name: str, + server_name: str, + database_name: str, + user_name: str, + password: str, + privacy_level: str, +): + + # https://review.learn.microsoft.com/en-us/rest/api/fabric/core/connections/create-connection?branch=features%2Fdmts&tabs=HTTP + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) client = fabric.FabricRestClient() @@ -17,15 +38,9 @@ def create_connection_cloud(name: str, server_name: str, database_name: str, use "connectionDetails": { "type": "SQL", "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], }, "privacyLevel": privacy_level, "credentialDetails": { @@ -33,33 +48,62 @@ def create_connection_cloud(name: str, server_name: str, database_name: str, use "connectionEncryption": "NotEncrypted", "skipTestConnection": False, "credentials": { - "credentialType": "Basic", - "username": user_name, - "password": password - } - } + "credentialType": "Basic", + "username": user_name, + "password": password, + }, + }, } - response = client.post(f"/v1/connections",json=request_body) + response = client.post(f"/v1/connections", json=request_body) if response.status_code == 200: o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) return df else: print(response.status_code) -def create_connection_on_prem(name: str, gateway_id: str, server_name: str, database_name: str, credentials: str, privacy_level: str): - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Gateway ID', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) +def create_connection_on_prem( + name: str, + gateway_id: str, + server_name: str, + database_name: str, + credentials: str, + privacy_level: str, +): + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Gateway ID", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) client = fabric.FabricRestClient() @@ -70,15 +114,9 @@ def create_connection_on_prem(name: str, gateway_id: str, server_name: str, data "connectionDetails": { "type": "SQL", "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], }, "privacyLevel": privacy_level, "credentialDetails": { @@ -86,37 +124,63 @@ def create_connection_on_prem(name: str, gateway_id: str, server_name: str, data "connectionEncryption": "NotEncrypted", "skipTestConnection": False, "credentials": { - "credentialType": "Windows", - "values": [ - { - "gatewayId": gateway_id, - "credentials": credentials - } - ] - } - } + "credentialType": "Windows", + "values": [{"gatewayId": gateway_id, "credentials": credentials}], + }, + }, } - response = client.post(f"/v1/connections",json=request_body) + response = client.post(f"/v1/connections", json=request_body) if response.status_code == 200: o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Gateway ID': o['gatewayId'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Gateway ID": o["gatewayId"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) return df else: print(response.status_code) -def create_connection_vnet(name: str, gateway_id: str, server_name: str, database_name: str, user_name: str, password: str, privacy_level: str): - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Gateway ID', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) +def create_connection_vnet( + name: str, + gateway_id: str, + server_name: str, + database_name: str, + user_name: str, + password: str, + privacy_level: str, +): + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Gateway ID", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) client = fabric.FabricRestClient() @@ -127,15 +191,9 @@ def create_connection_vnet(name: str, gateway_id: str, server_name: str, databas "connectionDetails": { "type": "SQL", "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], }, "privacyLevel": privacy_level, "credentialDetails": { @@ -143,26 +201,34 @@ def create_connection_vnet(name: str, gateway_id: str, server_name: str, databas "connectionEncryption": "Encrypted", "skipTestConnection": False, "credentials": { - "credentialType": "Basic", - "username": user_name, - "password": password - } - } + "credentialType": "Basic", + "username": user_name, + "password": password, + }, + }, } - response = client.post(f"/v1/connections",json=request_body) + response = client.post(f"/v1/connections", json=request_body) if response.status_code == 200: o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Gateway ID': o['gatewayId'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Gateway ID": o["gatewayId"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) return df else: - print(response.status_code) \ No newline at end of file + print(response.status_code) diff --git a/sempy_labs/CreatePQTFile.py b/sempy_labs/CreatePQTFile.py deleted file mode 100644 index f3303b2f..00000000 --- a/sempy_labs/CreatePQTFile.py +++ /dev/null @@ -1,191 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, os, shutil -import xml.etree.ElementTree as ET -from .ListFunctions import list_tables -from .Lakehouse import lakehouse_attached -from sempy._utils._log import log -from typing import List, Optional, Union - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def create_pqt_file(dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None): - - """ - Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - file_name : str, default=None - The name of the Power Query Template file to be generated. - Defaults to None which resolves to 'PowerQueryTemplate'. - - Returns - ------- - - """ - - if file_name is None: - file_name = 'PowerQueryTemplate' - - lakeAttach = lakehouse_attached() - - if lakeAttach == False: - print(f"{red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - folderPath = '/lakehouse/default/Files' - subFolderPath = os.path.join(folderPath, 'pqtnewfolder') - os.makedirs(subFolderPath, exist_ok=True) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfT = list_tables(dataset, workspace) - dfE = fabric.list_expressions(dataset = dataset, workspace = workspace) - - # Check if M-partitions are used - if any(dfP['Source Type'] == 'M'): - class QueryMetadata: - def __init__(self, QueryName, QueryGroupId=None, LastKnownIsParameter=None, LastKnownResultTypeName=None, LoadEnabled=True, IsHidden=False): - self.QueryName = QueryName - self.QueryGroupId = QueryGroupId - self.LastKnownIsParameter = LastKnownIsParameter - self.LastKnownResultTypeName = LastKnownResultTypeName - self.LoadEnabled = LoadEnabled - self.IsHidden = IsHidden - - class RootObject: - def __init__(self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None): - if QueryGroups is None: - QueryGroups = [] - self.DocumentLocale = DocumentLocale - self.EngineVersion = EngineVersion - self.QueriesMetadata = QueriesMetadata - self.QueryGroups = QueryGroups - - # STEP 1: Create MashupDocument.pq - mdfileName = 'MashupDocument.pq' - mdFilePath = os.path.join(subFolderPath, mdfileName) - sb = 'section Section1;' - for table_name in dfP['Table Name'].unique(): - tName = '#\"' + table_name + '"' - sourceExpression = dfT.loc[(dfT['Name'] == table_name), 'Source Expression'].iloc[0] - refreshPolicy = dfT.loc[(dfT['Name'] == table_name), 'Refresh Policy'].iloc[0] - sourceType = dfP.loc[(dfP['Table Name'] == table_name), 'Source Type'].iloc[0] - - if sourceType == 'M' or refreshPolicy: - sb = sb + '\n' + 'shared ' + tName + ' = ' - - partitions_in_table = dfP.loc[dfP['Table Name'] == table_name, 'Partition Name'].unique() - - i=1 - for partition_name in partitions_in_table: - pSourceType = dfP.loc[(dfP['Table Name'] == table_name) & (dfP['Partition Name'] == partition_name), 'Source Type'].iloc[0] - pQuery = dfP.loc[(dfP['Table Name'] == table_name) & (dfP['Partition Name'] == partition_name), 'Query'].iloc[0] - - if pQuery is not None: - pQueryNoSpaces = pQuery.replace(' ','').replace('\n','').replace('\t','').replace('\r','') - if pQueryNoSpaces.startswith('letSource=""'): - pQuery = 'let\n\tSource = ""\nin\n\tSource' - - if pSourceType == 'M' and i==1: - sb = sb + pQuery + ';' - elif refreshPolicy and i==1: - sb = sb + sourceExpression + ';' - i+=1 - - for index, row in dfE.iterrows(): - expr = row['Expression'] - eName = row['Name'] - eName = '#"' + eName + '"' - sb = sb + '\n' + "shared " + eName + " = " + expr + ";" - - with open(mdFilePath, 'w') as file: - file.write(sb) - - # STEP 2: Create the MashupMetadata.json file - mmfileName = 'MashupMetadata.json' - mmFilePath = os.path.join(subFolderPath, mmfileName) - queryMetadata = [] - - for tName in dfP['Table Name'].unique(): - sourceType = dfP.loc[(dfP['Table Name'] == tName), 'Source Type'].iloc[0] - refreshPolicy = dfT.loc[(dfT['Name'] == tName), 'Refresh Policy'].iloc[0] - if sourceType == 'M' or refreshPolicy: - queryMetadata.append(QueryMetadata(tName, None, None, None, True, False)) - - for i, r in dfE.iterrows(): - eName = r['Name'] - eKind = r['Kind'] - if eKind == 'M': - queryMetadata.append(QueryMetadata(eName, None, None, None, True, False)) - else: - queryMetadata.append(QueryMetadata(eName, None, None, None, False, False)) - - rootObject = RootObject("en-US", "2.126.453.0", queryMetadata) - - def obj_to_dict(obj): - if isinstance(obj, list): - return [obj_to_dict(e) for e in obj] - elif hasattr(obj, "__dict__"): - return {k: obj_to_dict(v) for k, v in obj.__dict__.items()} - else: - return obj - jsonContent = json.dumps(obj_to_dict(rootObject), indent=4) - - with open(mmFilePath, 'w') as json_file: - json_file.write(jsonContent) - - # STEP 3: Create Metadata.json file - mFileName = 'Metadata.json' - mFilePath = os.path.join(subFolderPath, mFileName) - metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"} - jsonContent = json.dumps(metaData, indent=4) - - with open(mFilePath, 'w') as json_file: - json_file.write(jsonContent) - - # STEP 4: Create [Content_Types].xml file: - ns = 'http://schemas.openxmlformats.org/package/2006/content-types' - ET.register_namespace('', ns) - types = ET.Element("{%s}Types" % ns) - default1 = ET.SubElement(types, "{%s}Default" % ns, {"Extension": "json", "ContentType": "application/json"}) - default2 = ET.SubElement(types, "{%s}Default" % ns, {"Extension": "pq", "ContentType": "application/x-ms-m"}) - xmlDocument = ET.ElementTree(types) - xmlFileName = '[Content_Types].xml' - xmlFilePath = os.path.join(subFolderPath, xmlFileName) - xmlDocument.write(xmlFilePath, xml_declaration=True, encoding='utf-8', method='xml') - - # STEP 5: Zip up the 4 files - zipFileName = file_name + '.zip' - zipFilePath = os.path.join(folderPath, zipFileName) - shutil.make_archive(zipFilePath[:-4], 'zip', subFolderPath) - - # STEP 6: Convert the zip file back into a .pqt file - newExt = '.pqt' - directory = os.path.dirname(zipFilePath) - fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0] - newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt) - shutil.move(zipFilePath, newFilePath) - - #STEP 7: Delete subFolder directory which is no longer needed - shutil.rmtree(subFolderPath, ignore_errors=True) - - print(f"{green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse.") - - else: - print(f"{yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated.") \ No newline at end of file diff --git a/sempy_labs/DirectLakeSchemaCompare.py b/sempy_labs/DirectLakeSchemaCompare.py deleted file mode 100644 index 66e1fd0e..00000000 --- a/sempy_labs/DirectLakeSchemaCompare.py +++ /dev/null @@ -1,87 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint -from .GetLakehouseColumns import get_lakehouse_columns -from .ListFunctions import list_tables -from typing import List, Optional, Union - -def direct_lake_schema_compare(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if lakehouse_workspace is None: - lakehouse_workspace = workspace - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] - - if len(dfI_filt) == 0: - print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.") - return - - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model is not in Direct Lake mode.") - return - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) - - dfT.rename(columns={'Type': 'Table Type'}, inplace=True) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - dfC = pd.merge(dfC,dfP[['Table Name', 'Query']], on='Table Name', how='inner') - dfC = pd.merge(dfC,dfT[['Name', 'Table Type']], left_on='Table Name', right_on='Name', how='inner') - dfC['Full Column Name'] = format_dax_object_name(dfC['Query'], dfC['Source']) - dfC_filt = dfC[dfC['Table Type'] == 'Table'] - # Schema compare - missingtbls = dfP_filt[~dfP_filt['Query'].isin(lc['Table Name'])] - missingtbls = missingtbls[['Table Name', 'Query']] - missingtbls.rename(columns={'Query': 'Source Table'}, inplace=True) - missingcols = dfC_filt[~dfC_filt['Full Column Name'].isin(lc['Full Column Name'])] - missingcols = missingcols[['Table Name', 'Column Name', 'Type', 'Data Type', 'Source']] - missingcols.rename(columns={'Source': 'Source Column'}, inplace=True) - - if len(missingtbls) == 0: - print(f"All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - else: - print(f"The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - display(missingtbls) - if len(missingcols) == 0: - print(f"All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - else: - print(f"The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - display(missingcols) - \ No newline at end of file diff --git a/sempy_labs/GenerateReport.py b/sempy_labs/GenerateReport.py deleted file mode 100644 index d6de2531..00000000 --- a/sempy_labs/GenerateReport.py +++ /dev/null @@ -1,255 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, base64, time -from typing import List, Optional, Union - -def create_report_from_reportjson(report: str, dataset: str, report_json: str, theme_json: Optional[str] = None, workspace: Optional[str] = None): - - """ - Creates a report based on a report.json file (and an optional themes.json file). - - Parameters - ---------- - report : str - Name of the report. - dataset : str - Name of the semantic model to connect to the report. - report_json : str - The report.json file to be used to create the report. - theme_json : str, default=None - The theme.json file to be used for the theme of the report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objectType = 'Report' - - dfI_m = fabric.list_items(workspace = workspace, type = 'SemanticModel') - dfI_model = dfI_m[(dfI_m['Display Name'] == dataset)] - - if len(dfI_model) == 0: - print(f"ERROR: The '{dataset}' semantic model does not exist in the '{workspace}' workspace.") - return - - datasetId = dfI_model['Id'].iloc[0] - - dfI_r = fabric.list_items(workspace = workspace, type = 'Report') - dfI_rpt = dfI_r[(dfI_r['Display Name'] == report)] - - if len(dfI_rpt) > 0: - print(f"WARNING: '{report}' already exists as a report in the '{workspace}' workspace.") - return - - client = fabric.FabricRestClient() - defPBIR = { - "version": "1.0", - "datasetReference": { - "byPath": None, - "byConnection": { - "connectionString": None, - "pbiServiceModelId": None, - "pbiModelVirtualServerName": "sobe_wowvirtualserver", - "pbiModelDatabaseName": datasetId, - "name": "EntityDataSource", - "connectionType": "pbiServiceXmlaStyleLive" - } - } -} - - def conv_b64(file): - - loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - - return f - - definitionPBIR = conv_b64(defPBIR) - payloadReportJson = conv_b64(report_json) - - if theme_json == None: - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": definitionPBIR, - "payloadType": "InlineBase64" - } - ] - - } - } - else: - payloadThemeJson = conv_b64(theme_json) - themeID = theme_json['payload']['blob']['displayName'] - themePath = 'StaticResources/SharedResources/BaseThemes/' + themeID + '.json' - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": themePath, - "payload": payloadThemeJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": definitionPBIR, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/items",json=request_body) - - if response.status_code == 201: - print('Report creation succeeded') - print(response.json()) - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - print('Report creation succeeded') - print(response.json()) - -def update_report_from_reportjson(report: str, report_json: str, workspace: Optional[str] = None): - - """ - Updates a report based on a report.json file. - - Parameters - ---------- - report : str - Name of the report. - report_json : str - The report.json file to be used to update the report. - workspace : str, default=None - The Fabric workspace name in which the report resides. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objectType = 'Report' - - dfR = fabric.list_reports(workspace = workspace) - dfR_filt = dfR[(dfR['Name'] == report) & (dfR['Report Type'] == 'PowerBIReport')] - - if len(dfR_filt) == 0: - print(f"The '{report}' report does not exist in the '{workspace}' workspace.") - return - - reportId = dfR_filt['Id'].iloc[0] - client = fabric.FabricRestClient() - - response = client.post(f"/v1/workspaces/{workspace_id}/items/{reportId}/getDefinition") - df_items = pd.json_normalize(response.json()['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'definition.pbir'] - rptDefFile = df_items_filt['payload'].iloc[0] - #datasetId = dfR_filt['Dataset Id'].iloc[0] - #datasetWorkspaceId = dfR_filt['Dataset Workspace Id'].iloc[0] - - - #defPBIR = { - #"version": "1.0", - #"datasetReference": { - # "byPath": None, - # "byConnection": { - # "connectionString": None, - # "pbiServiceModelId": None, - # "pbiModelVirtualServerName": "sobe_wowvirtualserver", - # "pbiModelDatabaseName": datasetId, - # "name": "EntityDataSource", - # "connectionType": "pbiServiceXmlaStyleLive" - # } - #} -#} - - def conv_b64(file): - - loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - - return f - - #definitionPBIR = conv_b64(defPBIR) - payloadReportJson = conv_b64(report_json) - - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": rptDefFile, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/reports/{reportId}/updateDefinition",json=request_body) - - if response.status_code == 201: - print(f"The '{report}' report has been successfully updated.") - #print(response.json()) - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{report}' report has been successfully updated.") - #print(response.json()) \ No newline at end of file diff --git a/sempy_labs/GetLakehouseTables.py b/sempy_labs/GetLakehouseTables.py deleted file mode 100644 index f14f60fa..00000000 --- a/sempy_labs/GetLakehouseTables.py +++ /dev/null @@ -1,159 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from pyspark.sql import SparkSession -import pyarrow.parquet as pq -import datetime -from .HelperFunctions import resolve_lakehouse_id, resolve_lakehouse_name -from .Guardrails import get_sku_size, get_directlake_guardrails_for_sku -from .Lakehouse import lakehouse_attached -from typing import List, Optional, Union - -def get_lakehouse_tables(lakehouse: Optional[str] = None, workspace: Optional[str] = None, extended: Optional[bool] = False, count_rows: Optional[bool] = False, export: Optional[bool] = False): - - """ - Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. - - Parameters - ---------- - lakehouse : str, default=None - The Fabric lakehouse. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - extended : bool, default=False - Obtains additional columns relevant to the size of each table. - count_rows : bool, default=False - Obtains a row count for each lakehouse table. - export : bool, default=False - Exports the resulting dataframe to a delta table in the lakehouse. - - Returns - ------- - pandas.DataFrame - Shows the tables/columns within a lakehouse and their properties. - """ - - df = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'Table Name', 'Format', 'Type', 'Location']) - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - - if count_rows: #Setting countrows defaults to extended=True - extended=True - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables") - - for i in response.json()['data']: - tName = i['name'] - tType = i['type'] - tFormat = i['format'] - tLocation = i['location'] - if extended == False: - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - sku_value = get_sku_size(workspace) - guardrail = get_directlake_guardrails_for_sku(sku_value) - - spark = SparkSession.builder.getOrCreate() - - intColumns = ['Files', 'Row Groups', 'Table Size'] - if tType == 'Managed' and tFormat == 'delta': - detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0] - num_files = detail_df.numFiles - size_in_bytes = detail_df.sizeInBytes - - delta_table_path = f"Tables/{tName}" - latest_files = spark.read.format('delta').load(delta_table_path).inputFiles() - file_paths = [f.split("/")[-1] for f in latest_files] - - # Handle FileNotFoundError - num_rowgroups = 0 - for filename in file_paths: - try: - num_rowgroups += pq.ParquetFile(f"/lakehouse/default/{delta_table_path}/{filename}").num_row_groups - except FileNotFoundError: - continue - - if count_rows: - num_rows = spark.table(tName).count() - intColumns.append('Row Count') - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation, 'Files': num_files, 'Row Groups': num_rowgroups, 'Row Count': num_rows, 'Table Size': size_in_bytes } - else: - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation, 'Files': num_files, 'Row Groups': num_rowgroups, 'Table Size': size_in_bytes } - - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df[intColumns] = df[intColumns].astype(int) - - df['SKU'] = guardrail['Fabric SKUs'].iloc[0] - df['Parquet File Guardrail'] = guardrail['Parquet files per table'].iloc[0] - df['Row Group Guardrail'] = guardrail['Row groups per table'].iloc[0] - df['Row Count Guardrail'] = guardrail['Rows per table (millions)'].iloc[0] * 1000000 - - df['Parquet File Guardrail Hit'] = df['Files'] > df['Parquet File Guardrail'] - df['Row Group Guardrail Hit'] = df['Row Groups'] > df['Row Group Guardrail'] - - if count_rows: - df['Row Count Guardrail Hit'] = df['Row Count'] > df['Row Count Guardrail'] - - if export: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - spark = SparkSession.builder.getOrCreate() - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) - lakeTName = 'lakehouse_table_details' - lakeT_filt = df[df['Table Name'] == lakeTName] - - query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" - - if len(lakeT_filt) == 0: - runId = 1 - else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 - - export_df = df.copy() - - cols = ['Files', 'Row Groups', 'Row Count', 'Table Size', 'SKU', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail', 'Parquet File Guardrail Hit', 'Row Group Guardrail Hit', 'Row Count Guardrail Hit'] - - for c in cols: - if c not in export_df: - if c in ['Files', 'Row Groups', 'Row Count', 'Table Size', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail']: - export_df[c] = 0 - export_df[c] = export_df[c].astype(int) - elif c in ['SKU']: - export_df[c] = None - export_df[c] = export_df[c].astype(str) - elif c in ['Parquet File Guardrail Hit', 'Row Group Guardrail Hit', 'Row Count Guardrail Hit']: - export_df[c] = False - export_df[c] = export_df[c].astype(bool) - - print(f"Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n") - now = datetime.datetime.now() - export_df['Timestamp'] = now - export_df['RunId'] = runId - - export_df.columns = export_df.columns.str.replace(' ', '_') - spark_df = spark.createDataFrame(export_df) - spark_df.write.mode('append').format('delta').saveAsTable(lakeTName) - print(f"\u2022 Lakehouse table properties have been saved to the '{lakeTName}' delta table.") - - return df \ No newline at end of file diff --git a/sempy_labs/GetMeasureDependencies.py b/sempy_labs/GetMeasureDependencies.py index 6a1ba50d..ecb2a28a 100644 --- a/sempy_labs/GetMeasureDependencies.py +++ b/sempy_labs/GetMeasureDependencies.py @@ -1,11 +1,11 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import format_dax_object_name +from ._helper_functions import format_dax_object_name from typing import List, Optional, Union -def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): +def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): """ Shows all dependencies for all measures in a semantic model. @@ -28,8 +28,10 @@ def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = - """ + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TABLE] AS [Table Name] ,[OBJECT] AS [Object Name] @@ -39,44 +41,96 @@ def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY WHERE [OBJECT_TYPE] = 'MEASURE' - """) + """, + ) - dep['Object Type'] = dep['Object Type'].str.capitalize() - dep['Referenced Object Type'] = dep['Referenced Object Type'].str.capitalize() + dep["Object Type"] = dep["Object Type"].str.capitalize() + dep["Referenced Object Type"] = dep["Referenced Object Type"].str.capitalize() - dep['Full Object Name'] = format_dax_object_name(dep['Table Name'], dep['Object Name']) - dep['Referenced Full Object Name'] = format_dax_object_name(dep['Referenced Table'], dep['Referenced Object']) - dep['Parent Node'] = dep['Object Name'] + dep["Full Object Name"] = format_dax_object_name( + dep["Table Name"], dep["Object Name"] + ) + dep["Referenced Full Object Name"] = format_dax_object_name( + dep["Referenced Table"], dep["Referenced Object"] + ) + dep["Parent Node"] = dep["Object Name"] df = dep - df['Done'] = df.apply(lambda row: False if row['Referenced Object Type'] == 'Measure' else True, axis=1) + df["Done"] = df.apply( + lambda row: False if row["Referenced Object Type"] == "Measure" else True, + axis=1, + ) - while(any(df['Done'] == False)): + while any(df["Done"] == False): for i, r in df.iterrows(): - rObjFull = r['Referenced Full Object Name'] - rObj = r['Referenced Object'] - if r['Done'] == False: - dep_filt = dep[dep['Full Object Name'] == rObjFull] + rObjFull = r["Referenced Full Object Name"] + rObj = r["Referenced Object"] + if r["Done"] == False: + dep_filt = dep[dep["Full Object Name"] == rObjFull] for index, dependency in dep_filt.iterrows(): d = True - if dependency[5] == 'Measure': + if dependency[5] == "Measure": d = False - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[4], 'Referenced Table': dependency[3], 'Referenced Object Type': dependency[5], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[4], + "Referenced Table": dependency[3], + "Referenced Object Type": dependency[5], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) else: - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[5], 'Referenced Table': dependency[4], 'Referenced Object Type': dependency[6], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[5], + "Referenced Table": dependency[4], + "Referenced Object Type": dependency[6], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) - df.loc[i, 'Done'] = True + df.loc[i, "Done"] = True - df = df.drop(['Done','Full Object Name','Referenced Full Object Name'], axis=1) + df = df.drop(["Done", "Full Object Name", "Referenced Full Object Name"], axis=1) return df -def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): +def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): """ Shows all dependencies for all objects in a semantic model. @@ -99,8 +153,10 @@ def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = - """ + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TABLE] AS [Table Name] ,[OBJECT] AS [Object Name] @@ -110,40 +166,93 @@ def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): ,[REFERENCED_OBJECT] AS [Referenced Object] ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY - """) + """, + ) - dep['Object Type'] = dep['Object Type'].str.replace('_',' ').str.title() - dep['Referenced Object Type'] = dep['Referenced Object Type'].str.replace('_',' ').str.title() + dep["Object Type"] = dep["Object Type"].str.replace("_", " ").str.title() + dep["Referenced Object Type"] = ( + dep["Referenced Object Type"].str.replace("_", " ").str.title() + ) - dep['Full Object Name'] = format_dax_object_name(dep['Table Name'], dep['Object Name']) - dep['Referenced Full Object Name'] = format_dax_object_name(dep['Referenced Table'], dep['Referenced Object']) - dep['Parent Node'] = dep['Object Name'] + dep["Full Object Name"] = format_dax_object_name( + dep["Table Name"], dep["Object Name"] + ) + dep["Referenced Full Object Name"] = format_dax_object_name( + dep["Referenced Table"], dep["Referenced Object"] + ) + dep["Parent Node"] = dep["Object Name"] df = dep - objs = ['Measure','Calc Column', 'Calculation Item', 'Calc Table'] + objs = ["Measure", "Calc Column", "Calculation Item", "Calc Table"] - df['Done'] = df.apply(lambda row: False if row['Referenced Object Type'] in objs else True, axis=1) + df["Done"] = df.apply( + lambda row: False if row["Referenced Object Type"] in objs else True, axis=1 + ) - while(any(df['Done'] == False)): + while any(df["Done"] == False): for i, r in df.iterrows(): - rObjFull = r['Referenced Full Object Name'] - rObj = r['Referenced Object'] - if r['Done'] == False: - dep_filt = dep[dep['Full Object Name'] == rObjFull] + rObjFull = r["Referenced Full Object Name"] + rObj = r["Referenced Object"] + if r["Done"] == False: + dep_filt = dep[dep["Full Object Name"] == rObjFull] for index, dependency in dep_filt.iterrows(): - d = True + d = True if dependency[5] in objs: d = False - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[4], 'Referenced Table': dependency[3], 'Referenced Object Type': dependency[5], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[4], + "Referenced Table": dependency[3], + "Referenced Object Type": dependency[5], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) else: - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[5], 'Referenced Table': dependency[4], 'Referenced Object Type': dependency[6], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[5], + "Referenced Table": dependency[4], + "Referenced Object Type": dependency[6], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) - df.loc[i, 'Done'] = True + df.loc[i, "Done"] = True - df = df.drop(['Done'], axis=1) + df = df.drop(["Done"], axis=1) - return df \ No newline at end of file + return df diff --git a/sempy_labs/GetSemanticModelBim.py b/sempy_labs/GetSemanticModelBim.py index 0959dadb..f60526e5 100644 --- a/sempy_labs/GetSemanticModelBim.py +++ b/sempy_labs/GetSemanticModelBim.py @@ -1,13 +1,16 @@ -import sempy import sempy.fabric as fabric import pandas as pd import json, os, time, base64 -from .HelperFunctions import resolve_lakehouse_name -from .Lakehouse import lakehouse_attached +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs.lakehouse import lakehouse_attached from typing import List, Optional, Union -def get_semantic_model_bim(dataset: str, workspace: Optional[str] = None, save_to_file_name: Optional[str] = None): +def get_semantic_model_bim( + dataset: str, + workspace: Optional[str] = None, + save_to_file_name: Optional[str] = None, +): """ Extracts the Model.bim file for a given semantic model. @@ -33,47 +36,53 @@ def get_semantic_model_bim(dataset: str, workspace: Optional[str] = None, save_t workspace = fabric.resolve_workspace_name(workspace_id) else: workspace_id = fabric.resolve_workspace_id(workspace) - - objType = 'SemanticModel' + + objType = "SemanticModel" client = fabric.FabricRestClient() - itemList = fabric.list_items(workspace = workspace, type = objType) - itemListFilt = itemList[(itemList['Display Name'] == dataset)] - itemId = itemListFilt['Id'].iloc[0] - response = client.post(f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition") - + itemList = fabric.list_items(workspace=workspace, type=objType) + itemListFilt = itemList[(itemList["Display Name"] == dataset)] + itemId = itemListFilt["Id"].iloc[0] + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" + ) + if response.status_code == 200: res = response.json() elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] + operationId = response.headers["x-ms-operation-id"] response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": time.sleep(3) response = client.get(f"/v1/operations/{operationId}") response_body = json.loads(response.content) response = client.get(f"/v1/operations/{operationId}/result") res = response.json() - df_items = pd.json_normalize(res['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'model.bim'] - payload = df_items_filt['payload'].iloc[0] - bimFile = base64.b64decode(payload).decode('utf-8') + df_items = pd.json_normalize(res["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "model.bim"] + payload = df_items_filt["payload"].iloc[0] + bimFile = base64.b64decode(payload).decode("utf-8") bimJson = json.loads(bimFile) - if save_to_file_name is not None: + if save_to_file_name is not None: lakeAttach = lakehouse_attached() if lakeAttach == False: - print(f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") + print( + f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) return - + lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - folderPath = '/lakehouse/default/Files' - fileExt = '.bim' + folderPath = "/lakehouse/default/Files" + fileExt = ".bim" if not save_to_file_name.endswith(fileExt): save_to_file_name = save_to_file_name + fileExt filePath = os.path.join(folderPath, save_to_file_name) with open(filePath, "w") as json_file: json.dump(bimJson, json_file, indent=4) - print(f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n") + print( + f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + ) - return bimJson \ No newline at end of file + return bimJson diff --git a/sempy_labs/Guardrails.py b/sempy_labs/Guardrails.py index 3826cb45..1849289b 100644 --- a/sempy_labs/Guardrails.py +++ b/sempy_labs/Guardrails.py @@ -3,8 +3,8 @@ import pandas as pd from typing import List, Optional, Union -def get_direct_lake_guardrails(): +def get_direct_lake_guardrails(): """ Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query based on Microsoft's online documentation. @@ -17,17 +17,17 @@ def get_direct_lake_guardrails(): A table showing the Direct Lake guardrails by SKU. """ - url = 'https://learn.microsoft.com/power-bi/enterprise/directlake-overview' + url = "https://learn.microsoft.com/power-bi/enterprise/directlake-overview" tables = pd.read_html(url) df = tables[0] - df['Fabric SKUs'] = df['Fabric SKUs'].str.split('/') - df = df.explode('Fabric SKUs', ignore_index=True) - + df["Fabric SKUs"] = df["Fabric SKUs"].str.split("/") + df = df.explode("Fabric SKUs", ignore_index=True) + return df -def get_sku_size(workspace: Optional[str] = None): +def get_sku_size(workspace: Optional[str] = None): """ Shows the SKU size for a workspace. @@ -49,15 +49,20 @@ def get_sku_size(workspace: Optional[str] = None): workspace = fabric.resolve_workspace_name(workspace_id) dfC = fabric.list_capacities() - dfW = fabric.list_workspaces().sort_values(by='Name', ascending=True) - dfC.rename(columns={'Id': 'Capacity Id'}, inplace=True) - dfCW = pd.merge(dfW, dfC[['Capacity Id', 'Sku', 'Region', 'State']], on='Capacity Id', how='inner') - sku_value = dfCW.loc[dfCW['Name'] == workspace, 'Sku'].iloc[0] - + dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True) + dfC.rename(columns={"Id": "Capacity Id"}, inplace=True) + dfCW = pd.merge( + dfW, + dfC[["Capacity Id", "Sku", "Region", "State"]], + on="Capacity Id", + how="inner", + ) + sku_value = dfCW.loc[dfCW["Name"] == workspace, "Sku"].iloc[0] + return sku_value -def get_directlake_guardrails_for_sku(sku_size: str): +def get_directlake_guardrails_for_sku(sku_size: str): """ Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity. *Use the result of the 'get_sku_size' function as an input for this function's skuSize parameter.* @@ -65,7 +70,7 @@ def get_directlake_guardrails_for_sku(sku_size: str): Parameters ---------- sku_size : str - Sku size of a workspace/capacity + Sku size of a workspace/capacity Returns ------- @@ -74,6 +79,6 @@ def get_directlake_guardrails_for_sku(sku_size: str): """ df = get_direct_lake_guardrails() - filtered_df = df[df['Fabric SKUs'] == sku_size] - - return filtered_df \ No newline at end of file + filtered_df = df[df["Fabric SKUs"] == sku_size] + + return filtered_df diff --git a/sempy_labs/LogAnalytics.py b/sempy_labs/LogAnalytics.py index 8b4cacad..14d7197d 100644 --- a/sempy_labs/LogAnalytics.py +++ b/sempy_labs/LogAnalytics.py @@ -1,13 +1,18 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import resolve_dataset_id +from ._helper_functions import resolve_dataset_id from typing import List, Optional, Union from sempy._utils._log import log -@log -def run_dax(dataset: str, dax_query: str, user_name: Optional[str] = None, workspace: Optional[str] = None): +@log +def run_dax( + dataset: str, + dax_query: str, + user_name: Optional[str] = None, + workspace: Optional[str] = None, +): """ Runs a DAX query against a semantic model. @@ -30,7 +35,7 @@ def run_dax(dataset: str, dax_query: str, user_name: Optional[str] = None, works A pandas dataframe holding the result of the DAX query. """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries-in-group + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries-in-group if workspace is None: workspace_id = fabric.get_workspace_id() @@ -38,31 +43,24 @@ def run_dax(dataset: str, dax_query: str, user_name: Optional[str] = None, works else: workspace_id = fabric.resolve_workspace_id(workspace) - dataset_id = resolve_dataset_id(dataset = dataset, workspace = workspace) + dataset_id = resolve_dataset_id(dataset=dataset, workspace=workspace) if user_name is None: - request_body = { - "queries": [ - { - "query": dax_query - } - ] - } + request_body = {"queries": [{"query": dax_query}]} else: request_body = { - "queries": [ - { - "query": dax_query + "queries": [{"query": dax_query}], + "impersonatedUserName": user_name, } - ], - "impersonatedUserName": user_name - } client = fabric.PowerBIRestClient() - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries", json = request_body) - data = response.json()['results'][0]['tables'] - column_names = data[0]['rows'][0].keys() - data_rows = [row.values() for item in data for row in item['rows']] + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries", + json=request_body, + ) + data = response.json()["results"][0]["tables"] + column_names = data[0]["rows"][0].keys() + data_rows = [row.values() for item in data for row in item["rows"]] df = pd.DataFrame(data_rows, columns=column_names) - - return df \ No newline at end of file + + return df diff --git a/sempy_labs/MeasureDependencyTree.py b/sempy_labs/MeasureDependencyTree.py index 17a3b649..32000041 100644 --- a/sempy_labs/MeasureDependencyTree.py +++ b/sempy_labs/MeasureDependencyTree.py @@ -5,9 +5,11 @@ from typing import List, Optional, Union from sempy._utils._log import log -@log -def measure_dependency_tree(dataset: str, measure_name: str, workspace: Optional[str] = None): +@log +def measure_dependency_tree( + dataset: str, measure_name: str, workspace: Optional[str] = None +): """ Prints a measure dependency tree of all dependent objects for a measure in a semantic model. @@ -27,20 +29,21 @@ def measure_dependency_tree(dataset: str, measure_name: str, workspace: Optional """ - if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfM_filt = dfM[dfM['Measure Name'] == measure_name] + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfM_filt = dfM[dfM["Measure Name"] == measure_name] if len(dfM_filt) == 0: - print(f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace.") + print( + f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace." + ) return md = get_measure_dependencies(dataset, workspace) - df_filt = md[md['Object Name'] == measure_name] + df_filt = md[md["Object Name"] == measure_name] # Create a dictionary to hold references to nodes node_dict = {} @@ -50,27 +53,27 @@ def measure_dependency_tree(dataset: str, measure_name: str, workspace: Optional # Populate the tree for _, row in df_filt.iterrows(): - #measure_name = row['Object Name'] - ref_obj_table_name = row['Referenced Table'] - ref_obj_name = row['Referenced Object'] - ref_obj_type = row['Referenced Object Type'] - parent_node_name = row['Parent Node'] - + # measure_name = row['Object Name'] + ref_obj_table_name = row["Referenced Table"] + ref_obj_name = row["Referenced Object"] + ref_obj_type = row["Referenced Object Type"] + parent_node_name = row["Parent Node"] + # Create or get the parent node parent_node = node_dict.get(parent_node_name) if parent_node is None: - parent_node = Node(parent_node_name) + parent_node = Node(parent_node_name) node_dict[parent_node_name] = parent_node parent_node.custom_property = measureIcon + " " # Create the child node child_node_name = ref_obj_name child_node = Node(child_node_name, parent=parent_node) - if ref_obj_type == 'Column': + if ref_obj_type == "Column": child_node.custom_property = columnIcon + " '" + ref_obj_table_name + "'" - elif ref_obj_type == 'Table': + elif ref_obj_type == "Table": child_node.custom_property = tableIcon + " " - elif ref_obj_type == 'Measure': + elif ref_obj_type == "Measure": child_node.custom_property = measureIcon + " " # Update the dictionary with the child node @@ -81,4 +84,4 @@ def measure_dependency_tree(dataset: str, measure_name: str, workspace: Optional if tableIcon in node.custom_property: print(f"{pre}{node.custom_property}'{node.name}'") else: - print(f"{pre}{node.custom_property}[{node.name}]") \ No newline at end of file + print(f"{pre}{node.custom_property}[{node.name}]") diff --git a/sempy_labs/MigrateCalcTablesToLakehouse.py b/sempy_labs/MigrateCalcTablesToLakehouse.py deleted file mode 100644 index 67317272..00000000 --- a/sempy_labs/MigrateCalcTablesToLakehouse.py +++ /dev/null @@ -1,311 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import re, datetime, time -from .GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import resolve_lakehouse_name, resolve_lakehouse_id, create_abfss_path -from .TOM import connect_semantic_model -from pyspark.sql import SparkSession -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_calc_tables_to_lakehouse(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) - else: - lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace) - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - #dfC['Column Object'] = "'" + dfC['Table Name'] + "'[" + dfC['Column Name'] + "]" - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[~dfP_filt['Query'].str.contains('NAMEOF')] #Remove field parameters - #dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] - lakeTables = get_lakehouse_tables(lakehouse, lakehouse_workspace) - - # Do not execute the function if lakehouse tables already exist with the same name - killFunction = False - for i, r in dfP_filt.iterrows(): - tName = r['Table Name'] - dtName = tName.replace(' ', '_') - - if dtName in lakeTables['Table Name'].values: - print(f"{red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse}' lakehouse in the '{workspace}' workspace.") - killFunction = True - - if killFunction: - return - - spark = SparkSession.builder.getOrCreate() - - if len(dfP_filt) == 0: - print(f"{yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=dataset, workspace = workspace, readonly=True) as tom: - success = True - for t in tom.model.Tables: - if tom.is_auto_date_table(table_name = t.Name): - print(f"{yellow_dot} The '{t.Name}' table is an auto-datetime table and is not supported in the Direct Lake migration process. Please create a proper Date/Calendar table in your lakehoues and use it in your Direct Lake model.") - else: - for p in t.Partitions: - if str(p.SourceType) == 'Calculated': - query = p.Source.Expression - if 'NAMEOF' not in query: # exclude field parameters - daxQuery = '' - if query.lower().startswith('calendar') and any(str(c.Type) == 'Calculated' for c in t.Columns): - daxQuery = f"ADDCOLUMNS(\n{query}," - for c in t.Columns: - if str(c.Type) == 'Calculated': - expr = c.Expression - expr = expr.replace(f"'{t.Name}'",'').replace(f"{t.Name}[Date]",'[Date]') - expr = expr.replace('[MonthNo]','MONTH([Date])').replace('[QuarterNo]','INT((MONTH([Date]) + 2) / 3)') - daxQuery = f"{daxQuery}\n\"{c.Name}\",{expr}," - daxQuery = 'EVALUATE\n' + daxQuery.rstrip(',') + '\n)' - else: - daxQuery = f"EVALUATE\n{query}" - daxQueryTopN = daxQuery.replace('EVALUATE\n', 'EVALUATE\nTOPN(1,') + ')' - - try: - df = fabric.evaluate_dax(dataset = dataset, dax_string = daxQueryTopN, workspace = workspace) - - for col in df.columns: - pattern = r"\[([^\]]+)\]" - - matches = re.findall(pattern, col) - new_column_name = matches[0].replace(' ','') - - df.rename(columns={col: new_column_name}, inplace=True) - - try: - dataType = next(str(c.DataType) for c in tom.model.Tables[t.Name].Columns if str(c.Type) == 'CalculatedTableColumn' and c.SourceColumn == col) - except: - dataType = next(str(c.DataType) for c in tom.model.Tables[t.Name].Columns if str(c.Type) == 'Calculated' and c.Name == new_column_name) - - if dataType == 'Int64': - df[new_column_name] = df[new_column_name].astype(int) - elif dataType in ['Decimal', 'Double']: - df[new_column_name] = df[new_column_name].astype(float) - elif dataType == 'Boolean': - df[new_column_name] = df[new_column_name].astype(bool) - elif dataType == 'DateTime': - df[new_column_name] = pd.to_datetime(df[new_column_name]) - - delta_table_name = t.Name.replace(' ','_').lower() - - spark_df = spark.createDataFrame(df) - filePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = lakehouse_workspace_id, delta_table_name = delta_table_name) - spark_df.write.mode('overwrite').format('delta').save(filePath) - - start_time2 = datetime.datetime.now() - timeout2 = datetime.timedelta(minutes=1) - success2 = False - - while not success2: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom2: - success2 = True - tom2.set_annotation(object = tom2.model, name = t.Name, value = daxQuery) - except Exception as e: - if datetime.datetime.now() - start_time2 > timeout2: - break - time.sleep(1) - - print(f"{green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - except: - print(f"{red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) - -@log -def migrate_field_parameters(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Migrates field parameters from one semantic model to another. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - from .HelperFunctions import format_dax_object_name - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[dfP_filt['Query'].str.contains('NAMEOF')] # Only field parameters - dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] - - if len(dfP_filt) == 0: - print(f"{green_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no field parameters.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, workspace=new_dataset_workspace, readonly=False) as tom: - success = True - - for i,r in dfP_filt.iterrows(): - tName = r['Table Name'] - query = r['Query'] - - # For field parameters, remove calc columns from the query - rows = query.strip().split('\n') - filtered_rows = [row for row in rows if not any(value in row for value in dfC_CalcColumn['Column Object'].values)] - updated_query_string = '\n'.join(filtered_rows) - - # Remove extra comma - lines = updated_query_string.strip().split('\n') - lines[-2] = lines[-2].rstrip(',') - expr = '\n'.join(lines) - - try: - par = TOM.Partition() - par.Name = tName - - parSource = TOM.CalculatedPartitionSource() - par.Source = parSource - parSource.Expression = expr - - tbl = TOM.Table() - tbl.Name = tName - tbl.Partitions.Add(par) - - columns = ['Value1', 'Value2', 'Value3'] - - for colName in columns: - col = TOM.CalculatedTableColumn() - col.Name = colName - col.SourceColumn = '[' + colName + ']' - col.DataType = TOM.DataType.String - - tbl.Columns.Add(col) - - tom.model.Tables.Add(tbl) - - ep = TOM.JsonExtendedProperty() - ep.Name = 'ParameterMetadata' - ep.Value = '{"version":3,"kind":2}' - - rcd = TOM.RelatedColumnDetails() - gpc = TOM.GroupByColumn() - gpc.GroupingColumn = tom.model.Tables[tName].Columns['Value2'] - rcd.GroupByColumns.Add(gpc) - - # Update column properties - tom.model.Tables[tName].Columns['Value2'].IsHidden = True - tom.model.Tables[tName].Columns['Value3'].IsHidden = True - tom.model.Tables[tName].Columns['Value3'].DataType = TOM.DataType.Int64 - tom.model.Tables[tName].Columns['Value1'].SortByColumn = tom.model.Tables[tName].Columns['Value3'] - tom.model.Tables[tName].Columns['Value2'].SortByColumn = tom.model.Tables[tName].Columns['Value3'] - tom.model.Tables[tName].Columns['Value2'].ExtendedProperties.Add(ep) - tom.model.Tables[tName].Columns['Value1'].RelatedColumnDetails = rcd - - dfC_filt1 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value1]')] - col1 = dfC_filt1['Column Name'].iloc[0] - dfC_filt2 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value2]')] - col2 = dfC_filt2['Column Name'].iloc[0] - dfC_filt3 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value3]')] - col3 = dfC_filt3['Column Name'].iloc[0] - - tom.model.Tables[tName].Columns['Value1'].Name = col1 - tom.model.Tables[tName].Columns['Value2'].Name = col2 - tom.model.Tables[tName].Columns['Value3'].Name = col3 - - print(f"{green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace}' workspace.") - except: - print(f"{red_dot} The '{tName}' table has not been added as a field parameter.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateCalcTablesToSemanticModel.py b/sempy_labs/MigrateCalcTablesToSemanticModel.py deleted file mode 100644 index 6b7e04cb..00000000 --- a/sempy_labs/MigrateCalcTablesToSemanticModel.py +++ /dev/null @@ -1,123 +0,0 @@ -import sempy -import sempy.fabric as fabric -import re, datetime, time -from .GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import resolve_lakehouse_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_calc_tables_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None ): - - """ - Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Get calc tables but not field parameters - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[~dfP_filt['Query'].str.contains('NAMEOF')] - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - lc = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) - # Get all calc table columns of calc tables not including field parameters - dfC_filt = dfC[(dfC['Table Name'].isin(dfP_filt['Table Name']))]# & (dfC['Type'] == 'CalculatedTableColumn')] - #dfA = list_annotations(new_dataset, new_dataset_workspace) - #dfA_filt = dfA[(dfA['Object Type'] == 'Model') & ~ (dfA['Annotation Value'].str.contains('NAMEOF'))] - - if len(dfP_filt) == 0: - print(f"{green_dot} The '{dataset}' semantic model has no calculated tables.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - for tName in dfC_filt['Table Name'].unique(): - if tName.lower() in lc['Table Name'].values: - - try: - tom.model.Tables[tName] - except: - tom.add_table(name = tName) - tom.add_entity_partition(table_name=tName, entity_name=tName.replace(' ','_').lower()) - - columns_in_table = dfC_filt.loc[dfC_filt['Table Name'] == tName, 'Column Name'].unique() - - for cName in columns_in_table: - scName = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Source'].iloc[0] - cDataType = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Data Type'].iloc[0] - cType = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Type'].iloc[0] - - #av = tom.get_annotation_value(object = tom.model, name = tName) - - #if cType == 'CalculatedTableColumn': - #lakeColumn = scName.replace(' ','_') - #elif cType == 'Calculated': - pattern = r'\[([^]]+)\]' - - matches = re.findall(pattern, scName) - lakeColumn = matches[0].replace(' ','') - try: - tom.model.Tables[tName].Columns[cName] - except: - tom.add_data_column(table_name = tName, column_name=cName, source_column=lakeColumn, data_type=cDataType) - print(f"{green_dot} The '{tName}'[{cName}] column has been added.") - - print(f"\n{green_dot} All viable calculated tables have been added to the model.") - - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateModelObjectsToSemanticModel.py b/sempy_labs/MigrateModelObjectsToSemanticModel.py deleted file mode 100644 index aa984255..00000000 --- a/sempy_labs/MigrateModelObjectsToSemanticModel.py +++ /dev/null @@ -1,324 +0,0 @@ -import sempy -import sempy.fabric as fabric -import re, datetime, time -from .ListFunctions import list_tables -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_model_objects_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - import System - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspaceId = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - dfRole = fabric.get_roles(dataset = dataset, workspace = workspace) - dfRLS = fabric.get_row_level_security_permissions(dataset = dataset, workspace = workspace) - dfCI = fabric.list_calculation_items(dataset = dataset, workspace = workspace) - dfP = fabric.list_perspectives(dataset = dataset, workspace = workspace) - dfTranslation = fabric.list_translations(dataset = dataset, workspace = workspace) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfPar = fabric.list_partitions(dataset = dataset, workspace = workspace) - - dfP_cc = dfPar[(dfPar['Source Type'] == 'Calculated')] - dfP_fp = dfP_cc[dfP_cc['Query'].str.contains('NAMEOF')] - dfC_fp = dfC[dfC['Table Name'].isin(dfP_fp['Table Name'].values)] - - print(f"{in_progress} Updating '{new_dataset}' based on '{dataset}'...") - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - - isDirectLake = any(str(p.Mode) == 'DirectLake' for t in tom.model.Tables for p in t.Partitions) - - print(f"\n{in_progress} Updating table properties...") - for t in tom.model.Tables: - t.IsHidden = bool(dfT.loc[dfT['Name'] == t.Name, 'Hidden'].iloc[0]) - t.Description = dfT.loc[dfT['Name'] == t.Name, 'Description'].iloc[0] - t.DataCategory = dfT.loc[dfT['Name'] == t.Name, 'Data Category'].iloc[0] - - print(f"{green_dot} The '{t.Name}' table's properties have been updated.") - - print(f"\n{in_progress} Updating column properties...") - for t in tom.model.Tables: - if t.Name not in dfP_fp['Table Name'].values: # do not include field parameters - dfT_filtered = dfT[dfT['Name'] == t.Name] - tType = dfT_filtered['Type'].iloc[0] - for c in t.Columns: - if not c.Name.startswith('RowNumber-'): - dfC_filt = dfC[(dfC['Table Name'] == t.Name) & (dfC['Column Name'] == c.Name)] - cName = dfC_filt['Column Name'].iloc[0] - c.Name = cName - if tType == 'Table': - c.SourceColumn = cName.replace(' ', '_') - c.IsHidden = bool(dfC_filt['Hidden'].iloc[0]) - c.DataType = System.Enum.Parse(TOM.DataType, dfC_filt['Data Type'].iloc[0]) - c.DisplayFolder = dfC_filt['Display Folder'].iloc[0] - c.FormatString = dfC_filt['Format String'].iloc[0] - c.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, dfC_filt['Summarize By'].iloc[0]) - c.DataCategory = dfC_filt['Data Category'].iloc[0] - c.IsKey = bool(dfC_filt['Key'].iloc[0]) - sbc = dfC_filt['Sort By Column'].iloc[0] - - if sbc != None: - try: - c.SortByColumn = tom.model.Tables[t.Name].Columns[sbc] - except: - print(f"{red_dot} Failed to create '{sbc}' as a Sort By Column for the '{c.Name}' in the '{t.Name}' table.") - print(f"{green_dot} The '{t.Name}'[{c.Name}] column's properties have been updated.") - - print(f"\n{in_progress} Creating hierarchies...") - dfH_grouped = dfH.groupby(['Table Name', 'Hierarchy Name', 'Hierarchy Hidden', 'Hierarchy Description']).agg({'Level Name': list, 'Column Name': list}).reset_index() - - for i, r in dfH_grouped.iterrows(): - tName = r['Table Name'] - hName = r['Hierarchy Name'] - hDesc = r['Hierarchy Description'] - hHid = bool(r['Hierarchy Hidden']) - cols = r['Column Name'] - lvls = r['Level Name'] - - try: - tom.model.Tables[tName].Hierarchies[hName] - except: - tom.add_hierarchy(table_name = tName, hierarchy_name=hName, hierarchy_description=hDesc, hierarchy_hidden=hHid, columns=cols, levels=lvls) - print(f"{green_dot} The '{hName}' hierarchy has been added.") - - print(f"\n{in_progress} Creating measures...") - for i, r in dfM.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - mExpr = r['Measure Expression'] - mHidden = bool(r['Measure Hidden']) - mDF = r['Measure Display Folder'] - mDesc = r['Measure Description'] - mFS = r['Format String'] - - try: - tom.model.Tables[tName].Measures[mName] - except: - tom.add_measure(table_name = tName, measure_name=mName, expression=mExpr, hidden=mHidden, display_folder=mDF, description=mDesc, format_string=mFS) - print(f"{green_dot} The '{mName}' measure has been added.") - - for cgName in dfCI['Calculation Group Name'].unique(): - - isHidden = bool(dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Hidden'].iloc[0]) - prec = int(dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Precedence'].iloc[0]) - desc = dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Description'].iloc[0] - - try: - tom.model.Tables[cgName] - except: - tom.add_calculation_group(name = cgName, description = desc, precedence=prec, hidden=isHidden) - print(f"{green_dot} The '{cgName}' calculation group has been added.") - tom.model.DiscourageImplicitMeasures = True - - print(f"\n{in_progress} Updating calculation group column name...") - dfC_filt = dfC[(dfC['Table Name'] == cgName) & (dfC['Hidden'] == False)] - colName = dfC_filt['Column Name'].iloc[0] - tom.model.Tables[cgName].Columns['Name'].Name = colName - - calcItems = dfCI.loc[dfCI['Calculation Group Name'] == cgName, 'Calculation Item Name'].unique() - - print(f"\n{in_progress} Creating calculation items...") - for calcItem in calcItems: - ordinal = int(dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Ordinal'].iloc[0]) - expr = dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Expression'].iloc[0] - fse = dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Format String Expression'].iloc[0] - try: - tom.model.Tables[cgName].CalculationGroup.CalculationItems[calcItem] - except: - tom.add_calculation_item(table_name = cgName, calculation_item_name=calcItem, expression=expr, format_string_expression=fse, ordinal=ordinal) - print(f"{green_dot} The '{calcItem}' has been added to the '{cgName}' calculation group.") - - print(f"\n{in_progress} Creating relationships...") - for index, row in dfR.iterrows(): - fromTable = row['From Table'] - fromColumn = row['From Column'] - toTable = row['To Table'] - toColumn = row['To Column'] - isActive = row['Active'] - cfb = row['Cross Filtering Behavior'] - sfb = row['Security Filtering Behavior'] - rori = row['Rely On Referential Integrity'] - mult = row['Multiplicity'] - - card_mapping = {'m': 'Many', '1': 'One', '0': 'None'} - - fromCard = card_mapping.get(mult[0]) - toCard = card_mapping.get(mult[-1]) - - relName = create_relationship_name(fromTable,fromColumn,toTable,toColumn) - - if any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} already exists as a relationship in the semantic model.") - elif isDirectLake and any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn and (r.FromColumn.DataType == 'DateTime' or r.ToColumn.DataType == 'DateTime') for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} was not created since relationships based on DateTime columns are not supported.") - elif isDirectLake and any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn and (r.FromColumn.DataType != r.ToColumn.DataType) for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} was not created since columns used in a relationship must have the same data type.") - else: - try: - tom.add_relationship( - from_table = fromTable, from_column=fromColumn, - to_table=toTable, to_column=toColumn, - from_cardinality=fromCard,to_cardinality=toCard, - cross_filtering_behavior=cfb, - security_filtering_behavior=sfb, - rely_on_referential_integrity=rori, - is_active=isActive) - - print(f"{green_dot} The {relName} relationship has been added.") - except: - print(f"{red_dot} The {relName} relationship was not added.") - - print(f"\n{in_progress} Creating roles...") - for index, row in dfRole.iterrows(): - roleName = row['Role'] - roleDesc = row['Description'] - modPerm = row['Model Permission'] - - try: - tom.model.Roles[roleName] - except: - tom.add_role(role_name=roleName, model_permission=modPerm, description=roleDesc) - print(f"{green_dot} The '{roleName}' role has been added.") - - print(f"\n{in_progress} Creating row level security...") - for index, row in dfRLS.iterrows(): - roleName = row['Role'] - tName = row['Table'] - expr = row['Filter Expression'] - - try: - tom.set_rls(role_name=roleName, table_name=tName, filter_expression=expr) - print(f"{green_dot} Row level security for the '{tName}' table within the '{roleName}' role has been set.") - except: - print(f"{red_dot} Row level security for the '{tName}' table within the '{roleName}' role was not set.") - - print(f"\n{in_progress} Creating perspectives...") - for pName in dfP['Perspective Name'].unique(): - - try: - tom.model.Perspectives[pName] - except: - tom.add_perspective(perspective_name=pName) - print(f"{green_dot} The '{pName}' perspective has been added.") - - print(f"\n{in_progress} Adding objects to perspectives...") - for index, row in dfP.iterrows(): - pName = row['Perspective Name'] - tName = row['Table Name'] - oName = row['Object Name'] - oType = row['Object Type'] - tType = dfT.loc[(dfT['Name'] == tName), 'Type'].iloc[0] - - try: - if oType == 'Table': - tom.add_to_perspective(object = tom.model.Tables[tName], perspective_name=pName) - elif oType == 'Column': - tom.add_to_perspective(object = tom.model.Tables[tName].Columns[oName], perspective_name=pName) - elif oType == 'Measure': - tom.add_to_perspective(object = tom.model.Tables[tName].Measures[oName], perspective_name=pName) - elif oType == 'Hierarchy': - tom.add_to_perspective(object = tom.model.Tables[tName].Hierarchies[oName], perspective_name=pName) - except: - pass - - print(f"\n{in_progress} Creating translation languages...") - for trName in dfTranslation['Culture Name'].unique(): - try: - tom.model.Cultures[trName] - except: - tom.add_translation(trName) - print(f"{green_dot} The '{trName}' translation language has been added.") - - print(f"\n{in_progress} Creating translation values...") - for index, row in dfTranslation.iterrows(): - trName = row['Culture Name'] - tName = row['Table Name'] - oName = row['Object Name'] - oType = row['Object Type'] - translation = row['Translation'] - prop = row['Property'] - - if prop == 'Caption': - prop = 'Name' - elif prop == 'DisplayFolder': - prop = 'Display Folder' - - try: - if oType == 'Table': - tom.set_translation(object = tom.model.Tables[tName], language=trName, property = prop, value = translation) - elif oType == 'Column': - tom.set_translation(object = tom.model.Tables[tName].Columns[oName], language=trName, property = prop, value = translation) - elif oType == 'Measure': - tom.set_translation(object = tom.model.Tables[tName].Measures[oName], language=trName, property = prop, value = translation) - elif oType == 'Hierarchy': - tom.set_translation(object = tom.model.Tables[tName].Hierarchies[oName], language=trName, property = prop, value = translation) - elif oType == 'Level': - - pattern = r'\[([^]]+)\]' - matches = re.findall(pattern, oName) - lName = matches[0] - - pattern = r"'([^']+)'" - matches = re.findall(pattern, oName) - hName = matches[0] - tom.set_translation(object = tom.model.Tables[tName].Hierarchies[hName].Levels[lName], language=trName, property = prop, value = translation) - except: - pass - - print(f"\n{green_dot} Migration of objects from '{dataset}' -> '{new_dataset}' is complete.") - - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateTablesColumnsToSemanticModel.py b/sempy_labs/MigrateTablesColumnsToSemanticModel.py deleted file mode 100644 index 47f5d054..00000000 --- a/sempy_labs/MigrateTablesColumnsToSemanticModel.py +++ /dev/null @@ -1,135 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import datetime, time -from .ListFunctions import list_tables -from .GetSharedExpression import get_shared_expression -from .HelperFunctions import resolve_lakehouse_name -from .Lakehouse import lakehouse_attached -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_tables_columns_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Check that lakehouse is attached to the notebook - lakeAttach = lakehouse_attached() - - # Run if lakehouse is attached to the notebook or a lakehouse & lakehouse workspace are specified - if lakeAttach or (lakehouse is not None and lakehouse_workspace is not None): - shEx = get_shared_expression(lakehouse, lakehouse_workspace) - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfT = list_tables(dataset, workspace) - dfT.rename(columns={'Type': 'Table Type'}, inplace=True) - dfC = pd.merge(dfC, dfT[['Name', 'Table Type']], left_on = 'Table Name', right_on = 'Name', how='left') - dfT_filt = dfT[dfT['Table Type'] == 'Table'] - dfC_filt = dfC[(dfC['Table Type'] == 'Table') & ~(dfC['Column Name'].str.startswith('RowNumber-')) & (dfC['Type'] != 'Calculated')] - - print(f"{in_progress} Updating '{new_dataset}' based on '{dataset}'...") - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - try: - tom.model.Expressions['DatabaseQuery'] - except: - tom.add_expression('DatabaseQuery', expression = shEx) - print(f"{green_dot} The 'DatabaseQuery' expression has been added.") - - for i, r in dfT_filt.iterrows(): - tName = r['Name'] - tDC = r['Data Category'] - tHid = bool(r['Hidden']) - tDesc = r['Description'] - - try: - tom.model.Tables[tName] - except: - tom.add_table(name = tName, description=tDesc, data_category=tDC, hidden=tHid) - tom.add_entity_partition(table_name = tName, entity_name = tName.replace(' ','_')) - print(f"{green_dot} The '{tName}' table has been added.") - - for i, r in dfC_filt.iterrows(): - tName = r['Table Name'] - cName = r['Column Name'] - scName = r['Source'].replace(' ','_') - cHid = bool(r['Hidden']) - cDataType = r['Data Type'] - - try: - tom.model.Tables[tName].Columns[cName] - except: - tom.add_data_column(table_name=tName, column_name=cName, source_column=scName, hidden=cHid, data_type=cDataType) - print(f"{green_dot} The '{tName}'[{cName}] column has been added.") - - print(f"\n{green_dot} All regular tables and columns have been added to the '{new_dataset}' semantic model.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) - else: - print(f"{red_dot} Lakehouse not attached to notebook and lakehouse/lakehouse_workspace are not specified. Please add your lakehouse to this notebook or specify the lakehouse/lakehouse_workspace parameters.") - print(f"To attach a lakehouse to a notebook, go to the the 'Explorer' window to the left, click 'Lakehouses' to add your lakehouse to this notebook") - print(f"\nLearn more here: https://learn.microsoft.com/fabric/data-engineering/lakehouse-notebook-explore#add-or-remove-a-lakehouse") - - - - - \ No newline at end of file diff --git a/sempy_labs/MigrationValidation.py b/sempy_labs/MigrationValidation.py deleted file mode 100644 index 150f7f78..00000000 --- a/sempy_labs/MigrationValidation.py +++ /dev/null @@ -1,133 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None): - - """ - Shows a list of semantic model objects. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing a list of objects in the semantic model - """ - - if workspace is None: - workspace = fabric.resolve_workspace_name() - - df = pd.DataFrame(columns=['Parent Name', 'Object Name', 'Object Type']) - with connect_semantic_model(dataset=dataset, workspace = workspace, readonly=True) as tom: - for t in tom.model.Tables: - if t.CalculationGroup is not None: - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': 'Calculation Group'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for ci in t.CalculationGroup.CalculationItems: - new_data = {'Parent Name': t.Name, 'Object Name': ci.Name, 'Object Type': str(ci.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - elif any(str(p.SourceType) == 'Calculated' for p in t.Partitions): - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': 'Calculated Table'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': str(t.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for c in t.Columns: - if str(c.Type) != 'RowNumber': - if str(c.Type) == 'Calculated': - new_data = {'Parent Name': c.Parent.Name, 'Object Name': c.Name, 'Object Type': 'Calculated Column'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - new_data = {'Parent Name': c.Parent.Name, 'Object Name': c.Name, 'Object Type': str(c.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for m in t.Measures: - new_data = {'Parent Name': m.Parent.Name, 'Object Name': m.Name, 'Object Type': str(m.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for h in t.Hierarchies: - new_data = {'Parent Name': h.Parent.Name, 'Object Name': h.Name, 'Object Type': str(h.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for l in h.Levels: - new_data = {'Parent Name': l.Parent.Name, 'Object Name': l.Name, 'Object Type': str(l.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for p in t.Partitions: - new_data = {'Parent Name': p.Parent.Name, 'Object Name': p.Name, 'Object Type': str(p.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for r in tom.model.Relationships: - rName = create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name) - new_data = {'Parent Name': r.Parent.Name, 'Object Name': rName, 'Object Type': str(r.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for role in tom.model.Roles: - new_data = {'Parent Name': role.Parent.Name, 'Object Name': role.Name, 'Object Type': str(role.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for rls in role.TablePermissions: - new_data = {'Parent Name': role.Name, 'Object Name': rls.Name, 'Object Type': str(rls.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for tr in tom.model.Cultures: - new_data = {'Parent Name': tr.Parent.Name, 'Object Name': tr.Name, 'Object Type': str(tr.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for per in tom.model.Perspectives: - new_data = {'Parent Name': per.Parent.Name, 'Object Name': per.Name, 'Object Type': str(per.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - -@log -def migration_validation(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Shows the objects in the original semantic model and whether then were migrated successfully or not. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. - """ - - dfA = list_semantic_model_objects(dataset = dataset, workspace = workspace) - dfB = list_semantic_model_objects(dataset = new_dataset, workspace = new_dataset_workspace) - - def is_migrated(row): - if row['Object Type'] == 'Calculated Table': - return ((dfB['Parent Name'] == row['Parent Name']) & - (dfB['Object Name'] == row['Object Name']) & - (dfB['Object Type'].isin(['Calculated Table', 'Table']))).any() - else: - return ((dfB['Parent Name'] == row['Parent Name']) & - (dfB['Object Name'] == row['Object Name']) & - (dfB['Object Type'] == row['Object Type'])).any() - - dfA['Migrated'] = dfA.apply(is_migrated, axis=1) - - denom = len(dfA) - num = len(dfA[dfA['Migrated']]) - print(f"{100 * round(num / denom,2)}% migrated") - - return dfA \ No newline at end of file diff --git a/sempy_labs/ModelAutoBuild.py b/sempy_labs/ModelAutoBuild.py index 6497061c..befa151a 100644 --- a/sempy_labs/ModelAutoBuild.py +++ b/sempy_labs/ModelAutoBuild.py @@ -2,14 +2,20 @@ import sempy.fabric as fabric import pandas as pd from .TOM import connect_semantic_model -from .CreateBlankSemanticModel import create_blank_semantic_model -from .GetSharedExpression import get_shared_expression +from ._create_blank_semantic_model import create_blank_semantic_model +from .directlake.GetSharedExpression import get_shared_expression from typing import List, Optional, Union from sempy._utils._log import log -@log -def model_auto_build(dataset: str, file_path: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +@log +def model_auto_build( + dataset: str, + file_path: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Dynamically generates a semantic model based on an Excel file template. @@ -29,10 +35,10 @@ def model_auto_build(dataset: str, file_path: str, workspace: Optional[str] = No The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if workspace is None: @@ -42,76 +48,96 @@ def model_auto_build(dataset: str, file_path: str, workspace: Optional[str] = No if lakehouse_workspace is None: lakehouse_workspace = workspace - sheets = ['Model', 'Tables', 'Measures', 'Columns', 'Roles', 'Hierarchies', 'Relationships'] + sheets = [ + "Model", + "Tables", + "Measures", + "Columns", + "Roles", + "Hierarchies", + "Relationships", + ] - create_blank_semantic_model(dataset=dataset, workspace = workspace) + create_blank_semantic_model(dataset=dataset, workspace=workspace) - with connect_semantic_model(dataset = dataset, workspace = workspace) as tom: + with connect_semantic_model(dataset=dataset, workspace=workspace) as tom: - #DL Only + # DL Only expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace) - tom.add_expression(name = 'DatbaseQuery', expression = expr) + tom.add_expression(name="DatbaseQuery", expression=expr) for sheet in sheets: - df = pd.read_excel(file_path, sheet_name= sheet) + df = pd.read_excel(file_path, sheet_name=sheet) - if sheet == 'Tables': + if sheet == "Tables": for i, r in df.iterrows(): - tName = r['Table Name'] - desc = r['Description'] - dc = r['Data Category'] - mode = r['Mode'] - hidden = bool(r['Hidden']) - - tom.add_table(name = tName, description = desc, data_category=dc, hidden = hidden) - if mode == 'DirectLake': - tom.add_entity_partition(table_name = tName, entity_name=tName) - elif sheet == 'Columns': + tName = r["Table Name"] + desc = r["Description"] + dc = r["Data Category"] + mode = r["Mode"] + hidden = bool(r["Hidden"]) + + tom.add_table( + name=tName, description=desc, data_category=dc, hidden=hidden + ) + if mode == "DirectLake": + tom.add_entity_partition(table_name=tName, entity_name=tName) + elif sheet == "Columns": for i, r in df.iterrows(): - tName = r['Table Name'] - cName = r['Column Name'] - scName = r['Source Column'] - dataType = r['Data Type'] - hidden = bool(r['Hidden']) - key = bool(r['Key']) - if dataType == 'Integer': - dataType = 'Int64' - desc = r['Description'] + tName = r["Table Name"] + cName = r["Column Name"] + scName = r["Source Column"] + dataType = r["Data Type"] + hidden = bool(r["Hidden"]) + key = bool(r["Key"]) + if dataType == "Integer": + dataType = "Int64" + desc = r["Description"] tom.add_data_column( - table_name = tName, column_name=cName, source_column=scName, - data_type=dataType, description = desc, hidden=hidden, key=key) - elif sheet == 'Measures': + table_name=tName, + column_name=cName, + source_column=scName, + data_type=dataType, + description=desc, + hidden=hidden, + key=key, + ) + elif sheet == "Measures": for i, r in df.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - expr = r['Expression'] - desc = r['Description'] - format = r['Format String'] - hidden = bool(r['Hidden']) + tName = r["Table Name"] + mName = r["Measure Name"] + expr = r["Expression"] + desc = r["Description"] + format = r["Format String"] + hidden = bool(r["Hidden"]) tom.add_measure( - table_name = tName, measure_name=mName, - expression=expr, format_string=format, description=desc, hidden=hidden) - elif sheet == 'Relationships': + table_name=tName, + measure_name=mName, + expression=expr, + format_string=format, + description=desc, + hidden=hidden, + ) + elif sheet == "Relationships": for i, r in df.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - fromCard = r['From Cardinality'] - toCard = r['To Cardinality'] + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + fromCard = r["From Cardinality"] + toCard = r["To Cardinality"] tom.add_relationship( - from_table=fromTable, from_column= fromColumn, - to_table=toTable, to_column = toColumn, - from_cardinality=fromCard, to_cardinality=toCard) - elif sheet == 'Roles': - print('hi') - elif sheet == 'Hierarchies': - print('hi') - - - - - \ No newline at end of file + from_table=fromTable, + from_column=fromColumn, + to_table=toTable, + to_column=toColumn, + from_cardinality=fromCard, + to_cardinality=toCard, + ) + elif sheet == "Roles": + print("hi") + elif sheet == "Hierarchies": + print("hi") diff --git a/sempy_labs/ModelBPA.py b/sempy_labs/ModelBPA.py index 89f2ff97..9059c07f 100644 --- a/sempy_labs/ModelBPA.py +++ b/sempy_labs/ModelBPA.py @@ -6,309 +6,709 @@ from IPython.display import display, HTML from pyspark.sql import SparkSession from .GetMeasureDependencies import get_measure_dependencies -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name -from .Lakehouse import lakehouse_attached -from .GetLakehouseTables import get_lakehouse_tables +from ._helper_functions import format_dax_object_name, resolve_lakehouse_name +from .lakehouse.Lakehouse import lakehouse_attached +from .lakehouse.GetLakehouseTables import get_lakehouse_tables from typing import List, Optional, Union from sempy._utils._log import log -def model_bpa_rules(): - """ +def model_bpa_rules(): + """ Shows the default rules for the semantic model BPA used by the run_model_bpa function. Parameters ---------- - + Returns ------- pandas.DataFrame A pandas dataframe containing the default rules for the run_model_bpa function. """ - df_rules = pd.DataFrame([ - ('Performance', 'Column', 'Warning', 'Do not use floating point data types', - lambda df: df['Data Type'] == 'Double', - 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).', - ), - ('Performance', 'Column', 'Warning', 'Avoid using calculated columns', - lambda df: df['Type'] == 'Calculated', - 'Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('Performance', 'Relationship', 'Warning', 'Check if bi-directional and many-to-many relationships are valid', - lambda df: (df['Multiplicity'] == 'm:m') | (df['Cross Filtering Behavior'] == 'BothDirections'), - 'Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.', - 'https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax' - ), - ('Performance', 'Row Level Security', 'Info', 'Check if dynamic row level security (RLS) is necessary', - lambda df: df['Is Dynamic'], - 'Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.', - 'https://docs.microsoft.com/power-bi/admin/service-admin-rls', - ), - ('Performance', 'Table', 'Warning', 'Avoid using many-to-many relationships on tables used for dynamic row level security', - lambda df: (df['Used in M2M Relationship'] == True) & (df['Used in Dynamic RLS'] == True), - "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.", - 'https://www.elegantbi.com/post/dynamicrlspatterns', - ), - ('Performance', 'Relationship', 'Warning', 'Many-to-many relationships should be single-direction', - lambda df: (df['Multiplicity'] == 'm:m') & (df['Cross Filtering Behavior'] == 'BothDirections'), - ), - ('Performance', 'Column', 'Warning', 'Set IsAvailableInMdx to false on non-attribute columns', - lambda df: (df['Is Direct Lake'] == False) & (df['Is Available in MDX'] == True) & ((df['Hidden'] == True) | (df['Parent Is Hidden'] == True)) & (df['Used in Sort By'] == False) & (df['Used in Hierarchy'] == False) & (df['Sort By Column'] == None), - 'To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.', - 'https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular', - ), - #('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table", - # lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']), - # "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.", - # "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions", - #), - ('Performance', 'Table', 'Warning', "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables", - lambda df: (df['Import Partitions'] == 1) & (df['Model Has DQ']) & (df['Used in Relationship x:1']), - "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting", - - ), - ('Performance', 'Partition', 'Warning', 'Minimize Power Query transformations', - lambda df: (df['Source Type'] == 'M') & (('Table.Combine(\"' in df['Query']) | ('Table.Join(\"' in df['Query']) | ('Table.NestedJoin(\"' in df['Query']) | ('Table.AddColumn(\"' in df['Query']) | ('Table.Group(\"' in df['Query']) | ('Table.Sort(\"' in df['Query']) | ('Table.Sort(\"' in df['Query']) | ('Table.Pivot(\"' in df['Query']) | ('Table.Unpivot(\"' in df['Query']) | ('Table.UnpivotOtherColumns(\"' in df['Query']) | ('Table.Distinct(\"' in df['Query']) | ('[Query=(\"\"SELECT' in df['Query']) | ('Value.NativeQuery' in df['Query']) | ('OleDb.Query' in df['Query']) | ('Odbc.Query' in df['Query']) ), - 'Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.', - 'https://docs.microsoft.com/power-query/power-query-folding', - ), - ('Performance', 'Table', 'Warning', 'Consider a star-schema instead of a snowflake architecture', - lambda df: (df['Type'] != 'Calculation Group') & df['Used in Relationship Both Sides'], - 'Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.', - 'https://docs.microsoft.com/power-bi/guidance/star-schema', - ), - ('Performance', 'Table', 'Warning', 'Reduce usage of calculated tables', - lambda df: df['Type'] == 'Calculated Table', - 'Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.', - ), - ('Performance', 'Column', 'Warning', 'Reduce usage of calculated columns that use the RELATED function', - lambda df: (df['Type'] == 'Calculated') & (df['Source'].str.contains(r'related\s*\(', case=False)), - 'Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.', - 'https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables', - ), - ('Performance', 'Model', 'Warning', 'Avoid excessive bi-directional or many-to-many relationships', - lambda df: (df['M2M or BiDi Relationship Count'] / df['Relationship Count']) > 0.3, - 'Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.', - 'https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax', - ), - ('Performance', 'Column', 'Warning', 'Avoid bi-directional or many-to-many relationships against high-cardinality columns', - lambda df: df['Used in M2M/BiDi Relationship'] & df['Column Cardinality'] > 100000, - 'For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns', - ), - ('Performance', 'Table', 'Warning', 'Remove auto-date table', - lambda df: (df['Type'] == 'Calculated Table') & ( (df['Name'].str.startswith('DateTableTemplate_')) | (df['Name'].str.startswith('LocalDateTable_')) ), - 'Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.', - 'https://www.youtube.com/watch?v=xu3uDEHtCrg', - ), - ('Performance', 'Table', 'Warning', 'Date/calendar tables should be marked as a date table', - lambda df: ( (df['Name'].str.contains(r'date', case=False)) | (df['Name'].str.contains(r'calendar', case=False)) ) & (df['Data Category'] != 'Time'), - "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.", - 'https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables', - ), - ('Performance', 'Table', 'Warning', 'Large tables should be partitioned', - lambda df: (df['Is Direct Lake'] == False) & (df['Partition Count'] == 1) & (df['Row Count'] > 25000000), - 'Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.', - ), - ('Performance', 'Row Level Security', 'Warning', 'Limit row level security (RLS) logic', - lambda df: df['Filter Expression'].str.contains('|'.join(['right', 'left', 'filter', 'upper', 'lower', 'find' ]), case=False), - 'Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).', - ), - ('Performance', 'Model', 'Warning', 'Model should have a date table', - lambda df: df['Has Date Table'], - 'Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.', - ), - ('Performance', 'Measure', 'Warning', 'Measures using time intelligence and model is using Direct Query', - lambda df: df['DQ Date Function Used'], - 'At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.', - ), - ('Error Prevention', 'Calculation Item', 'Error', 'Calculation items must have an expression', - lambda df: df['Expression'].str.len() == 0, - 'Calculation items must have an expression. Without an expression, they will not show any values.', - ), - ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy', 'Partition'], 'Error', 'Avoid invalid characters in names', - lambda df: df['Name'].apply(lambda x: any(unicodedata.category(char) == 'Cc' and not char.isspace() for char in x)), - 'This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.', - ), - ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy'], 'Error', 'Avoid invalid characters in descriptions', - lambda df: df['Description'].apply(lambda x: any(unicodedata.category(char) == 'Cc' and not char.isspace() for char in x)), - 'This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.', - ), - ('Error Prevention', 'Relationship', 'Warning', 'Relationship columns should be of the same data type', - lambda df: df['From Column Data Type'] != df['To Column Data Type'], - "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.", - ), - ('Error Prevention', 'Column', 'Error', 'Data columns must have a source column', - lambda df: (df['Type'] == 'Data') & (df['Source'].str.len() == 0), - 'Data columns must have a source column. A data column without a source column will cause an error when processing the model.', - ), - ('Error Prevention', 'Column', 'Warning', 'Set IsAvailableInMdx to true on necessary columns', - lambda df: (df['Is Direct Lake'] == False) & (df['Is Available in MDX'] == False) & ((df['Used in Sort By'] == True) | (df['Used in Hierarchy'] == True) | (df['Sort By Column'] != None)), - 'In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.', - ), - ('Error Prevention', 'Table', 'Error', 'Avoid the USERELATIONSHIP function and RLS against the same table', - lambda df: (df['USERELATIONSHIP Used'] == True) & (df['Used in RLS'] == True), - "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.", - 'https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Avoid using the IFERROR function', - lambda df: df['Measure Expression'].str.contains(r'irerror\s*\(', case=False), - 'Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Use the TREATAS function instead of INTERSECT for virtual relationships', - lambda df: df['Measure Expression'].str.contains(r'intersect\s*\(', case=False), - 'The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.', - 'https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax', - ), - ('DAX Expressions', 'Measure', 'Warning', 'The EVALUATEANDLOG function should not be used in production models', - lambda df: df['Measure Expression'].str.contains(r'evaluateandlog\s*\(', case=False), - 'The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.', - 'https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Measures should not be direct references of other measures', - lambda df: df['Measure Expression'].str.strip().isin(df['Measure Object']), - "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.", - ), - ('DAX Expressions', 'Measure', 'Warning', 'No two measures should have the same definition', - lambda df: df['Measure Expression'].apply(lambda x: re.sub(r'\s+', '', x)).duplicated(keep=False), - 'Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Avoid addition or subtraction of constant values to results of divisions', - lambda df: df["Measure Expression"].str.contains("(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1" or "\\/\\s*.*(?=[-+]\\s*1)", regex=True), - ), - ('DAX Expressions', 'Measure', 'Warning', "Avoid using '1-(x/y)' syntax", - lambda df: df['Measure Expression'].str.contains("[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/" or '[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(', regex=True), - "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )", - ), - ('DAX Expressions', 'Measure', 'Warning', 'Filter measure values by columns, not tables', - lambda df: df['Measure Expression'].str.contains("(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]" or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[", regex=True), - "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)", - 'https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Filter column values with proper syntax', - lambda df: df['Measure Expression'].str.contains("(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]" or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]", regex=True), - "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"", - 'https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Use the DIVIDE function for division', - lambda df: df['Measure Expression'].str.contains("\\]\\s*\\/(?!\\/)(?!\\*)\" or \"\\)\\s*\\/(?!\\/)(?!\\*)",regex=True), - 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.', - 'https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator', - ), - ('DAX Expressions', 'Measure', 'Error', 'Column references should be fully qualified', - lambda df: df['Has Unqualified Column Reference'], - 'Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Measure', 'Error', 'Measure references should be unqualified', - lambda df: df['Has Fully Qualified Measure Reference'], - 'Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Relationship', 'Warning', 'Inactive relationships that are never activated', - lambda df: df['Inactive without USERELATIONSHIP'], - 'Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.', - 'https://dax.guide/userelationship', - ), - ('Maintenance', 'Column', 'Warning', 'Remove unnecessary columns', - lambda df: (df['Hidden'] | df['Parent Is Hidden']) & ~ df['Used in Relationship'] & ~ df['Used in Sort By'] & ~ df['Used in Hierarchy'] & (df['Referenced By'] == 0) & ~ (df['Used in RLS']), # usedInOLS - 'Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.', - ), - ('Maintenance', 'Measure', 'Warning', 'Remove unnecessary measures', - lambda df: df['Measure Hidden'] & (df['Referenced By'] == 0), - 'Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.', - ), - #('Maintenance', 'Role', 'Warning', 'Remove roles with no members', - # lambda df: df['Member Count'] == 0, - #), - ('Maintenance', 'Table', 'Warning', 'Ensure tables have relationships', - lambda df: (df['Used in Relationship'] == False) & (df['Type'] != 'Calculation Group'), - 'This rule highlights tables which are not connected to any other table in the model with a relationship.', - ), - ('Maintenance', 'Table', 'Warning', 'Calculation groups with no calculation items', - lambda df: (df['Type'] == 'Calculation Group') & (df['Has Calculation Items']), - ), - ('Maintenance', 'Column', 'Info', 'Visible objects with no description', - lambda df: (df['Hidden'] == False) & (df['Description'].str.len() == 0), - 'Calculation groups have no function unless they have calculation items.', - ), - ('Formatting', 'Column', 'Warning', "Provide format string for 'Date' columns", - lambda df: (df['Column Name'].str.contains(r'date', case=False)) & (df['Data Type'] == 'DateTime') & (df['Format String'] != 'mm/dd/yyyy'), - 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".', - ), - ('Formatting', 'Column', 'Warning', 'Do not summarize numeric columns', - lambda df: ((df['Data Type'] == 'Int64') | (df['Data Type'] == 'Decimal') | (df['Data Type'] == 'Double')) & (df['Summarize By'] != 'None') & ~ ((df['Hidden']) | (df['Parent Is Hidden']) ), - 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).', - ), - ('Formatting', 'Measure', 'Info', 'Provide format string for measures', - lambda df: ~ ((df['Measure Hidden']) | (df['Parent Is Hidden'])) & (df['Format String'].str.len() == 0), - 'Visible measures should have their format string property assigned.', - ), - ('Formatting', 'Column', 'Info', 'Add data category for columns', - lambda df: (df['Data Category'] == '') & ((((df['Column Name'].str.contains(r'country', case=False)) | (df['Column Name'].str.contains(r'city', case=False)) | (df['Column Name'].str.contains(r'continent', case=False))) & (df['Data Type'] == 'String')) | (((df['Column Name'].str.contains(r'latitude', case=False)) | (df['Column Name'].str.contains(r'longitude', case=False))) & (df['Data Type'] == 'String')) ), - 'Add Data Category property for appropriate columns.', - 'https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization', - ), - ('Formatting', 'Measure', 'Warning', 'Percentages should be formatted with thousands separators and 1 decimal', - lambda df: (df['Format String'].str.contains('%')) & (df['Format String'] != '#,0.0%;-#,0.0%;#,0.0%'), - ), - ('Formatting', 'Measure', 'Warning', 'Whole numbers should be formatted with thousands separators and no decimals', - lambda df: (~ df['Format String'].str.contains('$')) & ~ (df['Format String'].str.contains('%')) & ~ ((df['Format String'] == '#,0') | (df['Format String'] == '#,0.0')), - ), - ('Formatting', 'Column', 'Info', 'Hide foreign keys', - lambda df: (df['Foreign Key']) & (df['Hidden'] == False), - 'Foreign keys should always be hidden.', - ), - ('Formatting', 'Column', 'Info', 'Mark primary keys', - lambda df: (df['Primary Key']) & (df['Key'] == False), - "Set the 'Key' property to 'True' for primary key columns within the column properties.", - ), - ('Formatting', 'Column', 'Info', 'Month (as a string) must be sorted', - lambda df: (df['Column Name'].str.contains(r'month', case=False)) & ~ (df['Column Name'].str.contains(r'months', case=False)) & (df['Data Type'] == 'String') & (df['Sort By Column'] == ''), - 'This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).', - ), - ('Formatting', 'Relationship', 'Warning', 'Relationship columns should be of integer data type', - lambda df: (df['From Column Data Type'] != 'Int64') | (df['To Column Data Type'] != 'Int64'), - 'It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.', - ), - ('Formatting', 'Column', 'Warning', 'Provide format string for "Month" columns', - lambda df: (df['Column Name'].str.contains(r'month', case=False)) & (df['Data Type'] == 'DateTime') & (df['Format String'] != 'MMMM yyyy'), - 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".', - ), - ('Formatting', 'Column', 'Info', 'Format flag columns as Yes/No value strings', - lambda df: ( df['Column Name'].str.startswith("Is") & (df['Data Type'] == "Int64") & ~ (df['Hidden'] | df['Parent Is Hidden']) ) | ( df['Column Name'].str.endswith(" Flag") & (df['Data Type'] != "String") & ~ (df['Hidden'] | df['Parent Is Hidden']) ), - 'Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.', - ), - #('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space', - # lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '), - # 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.', - #), - ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Info', 'First letter of objects must be capitalized', - lambda df: df['Name'].str[0].str.upper() != df['Name'].str[0], - 'The first letter of object names should be capitalized to maintain professional quality.', - ), - ('Naming Conventions', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Warning', 'Object names must not contain special characters', - lambda df: df['Name'].str.contains(r'[\t\r\n]'), - 'Object names should not include tabs, line breaks, etc.', - )#, - #('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names', - # lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(), - #)#, - - ], columns=['Category', 'Scope', 'Severity', 'Rule Name', 'Expression', 'Description', 'URL']) - - df_rules['Severity'] = df_rules['Severity'].replace('Warning', '⚠️').replace('Error', '\u274C').replace('Info', 'ℹ️') - - pd.set_option('display.max_colwidth', 1000) - - return df_rules + df_rules = pd.DataFrame( + [ + ( + "Performance", + "Column", + "Warning", + "Do not use floating point data types", + lambda df: df["Data Type"] == "Double", + 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).', + ), + ( + "Performance", + "Column", + "Warning", + "Avoid using calculated columns", + lambda df: df["Type"] == "Calculated", + "Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "Performance", + "Relationship", + "Warning", + "Check if bi-directional and many-to-many relationships are valid", + lambda df: (df["Multiplicity"] == "m:m") + | (df["Cross Filtering Behavior"] == "BothDirections"), + "Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.", + "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax", + ), + ( + "Performance", + "Row Level Security", + "Info", + "Check if dynamic row level security (RLS) is necessary", + lambda df: df["Is Dynamic"], + "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.", + "https://docs.microsoft.com/power-bi/admin/service-admin-rls", + ), + ( + "Performance", + "Table", + "Warning", + "Avoid using many-to-many relationships on tables used for dynamic row level security", + lambda df: (df["Used in M2M Relationship"] == True) + & (df["Used in Dynamic RLS"] == True), + "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.", + "https://www.elegantbi.com/post/dynamicrlspatterns", + ), + ( + "Performance", + "Relationship", + "Warning", + "Many-to-many relationships should be single-direction", + lambda df: (df["Multiplicity"] == "m:m") + & (df["Cross Filtering Behavior"] == "BothDirections"), + ), + ( + "Performance", + "Column", + "Warning", + "Set IsAvailableInMdx to false on non-attribute columns", + lambda df: (df["Is Direct Lake"] == False) + & (df["Is Available in MDX"] == True) + & ((df["Hidden"] == True) | (df["Parent Is Hidden"] == True)) + & (df["Used in Sort By"] == False) + & (df["Used in Hierarchy"] == False) + & (df["Sort By Column"] == None), + "To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.", + "https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular", + ), + # ('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table", + # lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']), + # "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.", + # "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions", + # ), + ( + "Performance", + "Table", + "Warning", + "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables", + lambda df: (df["Import Partitions"] == 1) + & (df["Model Has DQ"]) + & (df["Used in Relationship x:1"]), + "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting", + ), + ( + "Performance", + "Partition", + "Warning", + "Minimize Power Query transformations", + lambda df: (df["Source Type"] == "M") + & ( + ('Table.Combine("' in df["Query"]) + | ('Table.Join("' in df["Query"]) + | ('Table.NestedJoin("' in df["Query"]) + | ('Table.AddColumn("' in df["Query"]) + | ('Table.Group("' in df["Query"]) + | ('Table.Sort("' in df["Query"]) + | ('Table.Sort("' in df["Query"]) + | ('Table.Pivot("' in df["Query"]) + | ('Table.Unpivot("' in df["Query"]) + | ('Table.UnpivotOtherColumns("' in df["Query"]) + | ('Table.Distinct("' in df["Query"]) + | ('[Query=(""SELECT' in df["Query"]) + | ("Value.NativeQuery" in df["Query"]) + | ("OleDb.Query" in df["Query"]) + | ("Odbc.Query" in df["Query"]) + ), + "Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.", + "https://docs.microsoft.com/power-query/power-query-folding", + ), + ( + "Performance", + "Table", + "Warning", + "Consider a star-schema instead of a snowflake architecture", + lambda df: (df["Type"] != "Calculation Group") + & df["Used in Relationship Both Sides"], + "Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.", + "https://docs.microsoft.com/power-bi/guidance/star-schema", + ), + ( + "Performance", + "Table", + "Warning", + "Reduce usage of calculated tables", + lambda df: df["Type"] == "Calculated Table", + "Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.", + ), + ( + "Performance", + "Column", + "Warning", + "Reduce usage of calculated columns that use the RELATED function", + lambda df: (df["Type"] == "Calculated") + & (df["Source"].str.contains(r"related\s*\(", case=False)), + "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.", + "https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables", + ), + ( + "Performance", + "Model", + "Warning", + "Avoid excessive bi-directional or many-to-many relationships", + lambda df: ( + df["M2M or BiDi Relationship Count"] / df["Relationship Count"] + ) + > 0.3, + "Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.", + "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax", + ), + ( + "Performance", + "Column", + "Warning", + "Avoid bi-directional or many-to-many relationships against high-cardinality columns", + lambda df: df["Used in M2M/BiDi Relationship"] + & df["Column Cardinality"] + > 100000, + "For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns", + ), + ( + "Performance", + "Table", + "Warning", + "Remove auto-date table", + lambda df: (df["Type"] == "Calculated Table") + & ( + (df["Name"].str.startswith("DateTableTemplate_")) + | (df["Name"].str.startswith("LocalDateTable_")) + ), + "Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.", + "https://www.youtube.com/watch?v=xu3uDEHtCrg", + ), + ( + "Performance", + "Table", + "Warning", + "Date/calendar tables should be marked as a date table", + lambda df: ( + (df["Name"].str.contains(r"date", case=False)) + | (df["Name"].str.contains(r"calendar", case=False)) + ) + & (df["Data Category"] != "Time"), + "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.", + "https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables", + ), + ( + "Performance", + "Table", + "Warning", + "Large tables should be partitioned", + lambda df: (df["Is Direct Lake"] == False) + & (df["Partition Count"] == 1) + & (df["Row Count"] > 25000000), + "Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.", + ), + ( + "Performance", + "Row Level Security", + "Warning", + "Limit row level security (RLS) logic", + lambda df: df["Filter Expression"].str.contains( + "|".join(["right", "left", "filter", "upper", "lower", "find"]), + case=False, + ), + "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).", + ), + ( + "Performance", + "Model", + "Warning", + "Model should have a date table", + lambda df: df["Has Date Table"], + "Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.", + ), + ( + "Performance", + "Measure", + "Warning", + "Measures using time intelligence and model is using Direct Query", + lambda df: df["DQ Date Function Used"], + "At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.", + ), + ( + "Error Prevention", + "Calculation Item", + "Error", + "Calculation items must have an expression", + lambda df: df["Expression"].str.len() == 0, + "Calculation items must have an expression. Without an expression, they will not show any values.", + ), + ( + "Error Prevention", + ["Table", "Column", "Measure", "Hierarchy", "Partition"], + "Error", + "Avoid invalid characters in names", + lambda df: df["Name"].apply( + lambda x: any( + unicodedata.category(char) == "Cc" and not char.isspace() + for char in x + ) + ), + "This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.", + ), + ( + "Error Prevention", + ["Table", "Column", "Measure", "Hierarchy"], + "Error", + "Avoid invalid characters in descriptions", + lambda df: df["Description"].apply( + lambda x: any( + unicodedata.category(char) == "Cc" and not char.isspace() + for char in x + ) + ), + "This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.", + ), + ( + "Error Prevention", + "Relationship", + "Warning", + "Relationship columns should be of the same data type", + lambda df: df["From Column Data Type"] != df["To Column Data Type"], + "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.", + ), + ( + "Error Prevention", + "Column", + "Error", + "Data columns must have a source column", + lambda df: (df["Type"] == "Data") & (df["Source"].str.len() == 0), + "Data columns must have a source column. A data column without a source column will cause an error when processing the model.", + ), + ( + "Error Prevention", + "Column", + "Warning", + "Set IsAvailableInMdx to true on necessary columns", + lambda df: (df["Is Direct Lake"] == False) + & (df["Is Available in MDX"] == False) + & ( + (df["Used in Sort By"] == True) + | (df["Used in Hierarchy"] == True) + | (df["Sort By Column"] != None) + ), + "In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.", + ), + ( + "Error Prevention", + "Table", + "Error", + "Avoid the USERELATIONSHIP function and RLS against the same table", + lambda df: (df["USERELATIONSHIP Used"] == True) + & (df["Used in RLS"] == True), + "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.", + "https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid using the IFERROR function", + lambda df: df["Measure Expression"].str.contains( + r"irerror\s*\(", case=False + ), + "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Use the TREATAS function instead of INTERSECT for virtual relationships", + lambda df: df["Measure Expression"].str.contains( + r"intersect\s*\(", case=False + ), + "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.", + "https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "The EVALUATEANDLOG function should not be used in production models", + lambda df: df["Measure Expression"].str.contains( + r"evaluateandlog\s*\(", case=False + ), + "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.", + "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Measures should not be direct references of other measures", + lambda df: df["Measure Expression"] + .str.strip() + .isin(df["Measure Object"]), + "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "No two measures should have the same definition", + lambda df: df["Measure Expression"] + .apply(lambda x: re.sub(r"\s+", "", x)) + .duplicated(keep=False), + "Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid addition or subtraction of constant values to results of divisions", + lambda df: df["Measure Expression"].str.contains( + "(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1" + or "\\/\\s*.*(?=[-+]\\s*1)", + regex=True, + ), + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid using '1-(x/y)' syntax", + lambda df: df["Measure Expression"].str.contains( + "[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/" + or "[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(", + regex=True, + ), + "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Filter measure values by columns, not tables", + lambda df: df["Measure Expression"].str.contains( + "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]" + or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[", + regex=True, + ), + "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)", + "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Filter column values with proper syntax", + lambda df: df["Measure Expression"].str.contains( + "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]" + or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]", + regex=True, + ), + "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"", + "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Use the DIVIDE function for division", + lambda df: df["Measure Expression"].str.contains( + '\\]\\s*\\/(?!\\/)(?!\\*)" or "\\)\\s*\\/(?!\\/)(?!\\*)', regex=True + ), + 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.', + "https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator", + ), + ( + "DAX Expressions", + "Measure", + "Error", + "Column references should be fully qualified", + lambda df: df["Has Unqualified Column Reference"], + "Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Measure", + "Error", + "Measure references should be unqualified", + lambda df: df["Has Fully Qualified Measure Reference"], + "Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Relationship", + "Warning", + "Inactive relationships that are never activated", + lambda df: df["Inactive without USERELATIONSHIP"], + "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.", + "https://dax.guide/userelationship", + ), + ( + "Maintenance", + "Column", + "Warning", + "Remove unnecessary columns", + lambda df: (df["Hidden"] | df["Parent Is Hidden"]) + & ~df["Used in Relationship"] + & ~df["Used in Sort By"] + & ~df["Used in Hierarchy"] + & (df["Referenced By"] == 0) + & ~(df["Used in RLS"]), # usedInOLS + "Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.", + ), + ( + "Maintenance", + "Measure", + "Warning", + "Remove unnecessary measures", + lambda df: df["Measure Hidden"] & (df["Referenced By"] == 0), + "Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.", + ), + # ('Maintenance', 'Role', 'Warning', 'Remove roles with no members', + # lambda df: df['Member Count'] == 0, + # ), + ( + "Maintenance", + "Table", + "Warning", + "Ensure tables have relationships", + lambda df: (df["Used in Relationship"] == False) + & (df["Type"] != "Calculation Group"), + "This rule highlights tables which are not connected to any other table in the model with a relationship.", + ), + ( + "Maintenance", + "Table", + "Warning", + "Calculation groups with no calculation items", + lambda df: (df["Type"] == "Calculation Group") + & (df["Has Calculation Items"]), + ), + ( + "Maintenance", + "Column", + "Info", + "Visible objects with no description", + lambda df: (df["Hidden"] == False) & (df["Description"].str.len() == 0), + "Calculation groups have no function unless they have calculation items.", + ), + ( + "Formatting", + "Column", + "Warning", + "Provide format string for 'Date' columns", + lambda df: (df["Column Name"].str.contains(r"date", case=False)) + & (df["Data Type"] == "DateTime") + & (df["Format String"] != "mm/dd/yyyy"), + 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".', + ), + ( + "Formatting", + "Column", + "Warning", + "Do not summarize numeric columns", + lambda df: ( + (df["Data Type"] == "Int64") + | (df["Data Type"] == "Decimal") + | (df["Data Type"] == "Double") + ) + & (df["Summarize By"] != "None") + & ~((df["Hidden"]) | (df["Parent Is Hidden"])), + 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).', + ), + ( + "Formatting", + "Measure", + "Info", + "Provide format string for measures", + lambda df: ~((df["Measure Hidden"]) | (df["Parent Is Hidden"])) + & (df["Format String"].str.len() == 0), + "Visible measures should have their format string property assigned.", + ), + ( + "Formatting", + "Column", + "Info", + "Add data category for columns", + lambda df: (df["Data Category"] == "") + & ( + ( + ( + (df["Column Name"].str.contains(r"country", case=False)) + | (df["Column Name"].str.contains(r"city", case=False)) + | (df["Column Name"].str.contains(r"continent", case=False)) + ) + & (df["Data Type"] == "String") + ) + | ( + ( + (df["Column Name"].str.contains(r"latitude", case=False)) + | (df["Column Name"].str.contains(r"longitude", case=False)) + ) + & (df["Data Type"] == "String") + ) + ), + "Add Data Category property for appropriate columns.", + "https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization", + ), + ( + "Formatting", + "Measure", + "Warning", + "Percentages should be formatted with thousands separators and 1 decimal", + lambda df: (df["Format String"].str.contains("%")) + & (df["Format String"] != "#,0.0%;-#,0.0%;#,0.0%"), + ), + ( + "Formatting", + "Measure", + "Warning", + "Whole numbers should be formatted with thousands separators and no decimals", + lambda df: (~df["Format String"].str.contains("$")) + & ~(df["Format String"].str.contains("%")) + & ~((df["Format String"] == "#,0") | (df["Format String"] == "#,0.0")), + ), + ( + "Formatting", + "Column", + "Info", + "Hide foreign keys", + lambda df: (df["Foreign Key"]) & (df["Hidden"] == False), + "Foreign keys should always be hidden.", + ), + ( + "Formatting", + "Column", + "Info", + "Mark primary keys", + lambda df: (df["Primary Key"]) & (df["Key"] == False), + "Set the 'Key' property to 'True' for primary key columns within the column properties.", + ), + ( + "Formatting", + "Column", + "Info", + "Month (as a string) must be sorted", + lambda df: (df["Column Name"].str.contains(r"month", case=False)) + & ~(df["Column Name"].str.contains(r"months", case=False)) + & (df["Data Type"] == "String") + & (df["Sort By Column"] == ""), + "This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).", + ), + ( + "Formatting", + "Relationship", + "Warning", + "Relationship columns should be of integer data type", + lambda df: (df["From Column Data Type"] != "Int64") + | (df["To Column Data Type"] != "Int64"), + "It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.", + ), + ( + "Formatting", + "Column", + "Warning", + 'Provide format string for "Month" columns', + lambda df: (df["Column Name"].str.contains(r"month", case=False)) + & (df["Data Type"] == "DateTime") + & (df["Format String"] != "MMMM yyyy"), + 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".', + ), + ( + "Formatting", + "Column", + "Info", + "Format flag columns as Yes/No value strings", + lambda df: ( + df["Column Name"].str.startswith("Is") + & (df["Data Type"] == "Int64") + & ~(df["Hidden"] | df["Parent Is Hidden"]) + ) + | ( + df["Column Name"].str.endswith(" Flag") + & (df["Data Type"] != "String") + & ~(df["Hidden"] | df["Parent Is Hidden"]) + ), + "Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.", + ), + # ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space', + # lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '), + # 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.', + # ), + ( + "Formatting", + ["Table", "Column", "Measure", "Partition", "Hierarchy"], + "Info", + "First letter of objects must be capitalized", + lambda df: df["Name"].str[0].str.upper() != df["Name"].str[0], + "The first letter of object names should be capitalized to maintain professional quality.", + ), + ( + "Naming Conventions", + ["Table", "Column", "Measure", "Partition", "Hierarchy"], + "Warning", + "Object names must not contain special characters", + lambda df: df["Name"].str.contains(r"[\t\r\n]"), + "Object names should not include tabs, line breaks, etc.", + ), # , + # ('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names', + # lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(), + # )#, + ], + columns=[ + "Category", + "Scope", + "Severity", + "Rule Name", + "Expression", + "Description", + "URL", + ], + ) -@log -def run_model_bpa(dataset: str, rules_dataframe: Optional[pd.DataFrame] = None, workspace: Optional[str] = None, export: Optional[bool] = False, return_dataframe: Optional[bool] = False, **kwargs): + df_rules["Severity"] = ( + df_rules["Severity"] + .replace("Warning", "⚠️") + .replace("Error", "\u274C") + .replace("Info", "ℹ️") + ) + + pd.set_option("display.max_colwidth", 1000) + return df_rules + + +@log +def run_model_bpa( + dataset: str, + rules_dataframe: Optional[pd.DataFrame] = None, + workspace: Optional[str] = None, + export: Optional[bool] = False, + return_dataframe: Optional[bool] = False, + **kwargs, +): """ Displays an HTML visualization of the results of the Best Practice Analyzer scan for a semantic model. @@ -326,18 +726,23 @@ def run_model_bpa(dataset: str, rules_dataframe: Optional[pd.DataFrame] = None, If True, exports the resulting dataframe to a delta table in the lakehouse attached to the notebook. return_dataframe : bool, default=False If True, returns a pandas dataframe instead of the visualization. - + Returns ------- pandas.DataFrame A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules. """ - if 'extend' in kwargs: - print("The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward.") - del kwargs['extend'] + if "extend" in kwargs: + print( + "The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward." + ) + del kwargs["extend"] - warnings.filterwarnings("ignore", message="This pattern is interpreted as a regular expression, and has match groups.") + warnings.filterwarnings( + "ignore", + message="This pattern is interpreted as a regular expression, and has match groups.", + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -346,194 +751,340 @@ def run_model_bpa(dataset: str, rules_dataframe: Optional[pd.DataFrame] = None, if rules_dataframe is None: rules_dataframe = model_bpa_rules() - dfT = fabric.list_tables(dataset = dataset, workspace = workspace, extended=True) + dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True) dfT = dfT.drop_duplicates() - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended=True, additional_xmla_properties=['Parent.DataCategory', 'Parent.IsHidden']) - dfC = dfC[~dfC['Column Name'].str.startswith('RowNumber-')] - - dfM = fabric.list_measures(dataset = dataset, workspace = workspace, additional_xmla_properties=['Parent.IsHidden']) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace, additional_xmla_properties=['FromCardinality', 'ToCardinality']) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, additional_xmla_properties=['DataCoverageDefinition.Expression']) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfRole = fabric.get_roles(dataset = dataset, workspace = workspace) - dfRM = fabric.get_roles(dataset = dataset, workspace = workspace, include_members=True) - dfRLS = fabric.get_row_level_security_permissions(dataset = dataset, workspace = workspace) - #dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName) - #dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName) - dfCI = fabric.list_calculation_items(dataset = dataset, workspace = workspace) - #dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName) - #dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName) - dfD = fabric.list_datasets(mode = 'rest', workspace = workspace) - dfD = dfD[dfD['Dataset Name'] == dataset] - #datasetOwner = dfD['Configured By'].iloc[0] + dfC = fabric.list_columns( + dataset=dataset, + workspace=workspace, + extended=True, + additional_xmla_properties=["Parent.DataCategory", "Parent.IsHidden"], + ) + dfC = dfC[~dfC["Column Name"].str.startswith("RowNumber-")] + + dfM = fabric.list_measures( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["Parent.IsHidden"], + ) + dfR = fabric.list_relationships( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["FromCardinality", "ToCardinality"], + ) + dfP = fabric.list_partitions( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["DataCoverageDefinition.Expression"], + ) + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfRole = fabric.get_roles(dataset=dataset, workspace=workspace) + dfRM = fabric.get_roles(dataset=dataset, workspace=workspace, include_members=True) + dfRLS = fabric.get_row_level_security_permissions( + dataset=dataset, workspace=workspace + ) + # dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName) + # dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName) + dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace) + # dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName) + # dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName) + dfD = fabric.list_datasets(mode="rest", workspace=workspace) + dfD = dfD[dfD["Dataset Name"] == dataset] + # datasetOwner = dfD['Configured By'].iloc[0] md = get_measure_dependencies(dataset, workspace) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - dfC['Is Direct Lake'] = isDirectLake - dfT['Is Direct Lake'] = isDirectLake + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) + dfC["Is Direct Lake"] = isDirectLake + dfT["Is Direct Lake"] = isDirectLake - cols = ['From Cardinality', 'To Cardinality'] + cols = ["From Cardinality", "To Cardinality"] for col in cols: if not col in dfR: dfR[col] = None - cols = ['Parent Is Hidden'] + cols = ["Parent Is Hidden"] for col in cols: if not col in dfM: dfM[col] = None - + # Data Coverage Definition rule - dfP_imp = dfP[dfP['Mode'] == 'Import'] - dfTP = dfP_imp.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Import Partitions'}, inplace=True) - dfP = pd.merge(dfP, dfTP[['Table Name', 'Import Partitions']], on = 'Table Name', how = 'left') - dfP['Import Partitions'].fillna(0, inplace=True) - dfC_DateKey = dfC[(dfC['Parent Data Category'] == 'Time') & (dfC['Data Type'] == 'DateTime') & (dfC['Key'])] + dfP_imp = dfP[dfP["Mode"] == "Import"] + dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True) + dfP = pd.merge( + dfP, dfTP[["Table Name", "Import Partitions"]], on="Table Name", how="left" + ) + dfP["Import Partitions"].fillna(0, inplace=True) + dfC_DateKey = dfC[ + (dfC["Parent Data Category"] == "Time") + & (dfC["Data Type"] == "DateTime") + & (dfC["Key"]) + ] hasDateTable = False if len(dfC_DateKey) > 0: hasDateTable = True - dfP['Has Date Table'] = hasDateTable + dfP["Has Date Table"] = hasDateTable # Set dims to dual mode - dfR_one = dfR[dfR['To Cardinality'] == 'One'] - dfTP = dfP_imp.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Import Partitions'}, inplace=True) - dfT = pd.merge(dfT, dfTP, left_on = 'Name', right_on = 'Table Name', how='left') - dfT.drop(columns=['Table Name'], inplace=True) - dfT['Import Partitions'].fillna(0, inplace=True) - hasDQ = any(r['Mode'] == 'DirectQuery' for i, r in dfP.iterrows()) - dfT['Model Has DQ'] = hasDQ - dfT['Used in Relationship x:1'] = dfT['Name'].isin(dfR_one['To Table']) + dfR_one = dfR[dfR["To Cardinality"] == "One"] + dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True) + dfT = pd.merge(dfT, dfTP, left_on="Name", right_on="Table Name", how="left") + dfT.drop(columns=["Table Name"], inplace=True) + dfT["Import Partitions"].fillna(0, inplace=True) + hasDQ = any(r["Mode"] == "DirectQuery" for i, r in dfP.iterrows()) + dfT["Model Has DQ"] = hasDQ + dfT["Used in Relationship x:1"] = dfT["Name"].isin(dfR_one["To Table"]) dfF = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [FUNCTION_NAME] FROM $SYSTEM.MDSCHEMA_FUNCTIONS WHERE [INTERFACE_NAME] = 'DATETIME' - """) + """, + ) - dfC['Name'] = dfC['Column Name'] - dfH['Name'] = dfH['Hierarchy Name'] - dfM['Name'] = dfM['Measure Name'] - dfP['Name'] = dfP['Partition Name'] - dfRole['Name'] = dfRole['Role'] - dfD['Name'] = dfD['Dataset Name'] - dfH['Description'] = dfH['Hierarchy Description'] - dfM['Description'] = dfM['Measure Description'] - dfH['Hierarchy Object'] = format_dax_object_name(dfH['Table Name'], dfH['Hierarchy Name']) + dfC["Name"] = dfC["Column Name"] + dfH["Name"] = dfH["Hierarchy Name"] + dfM["Name"] = dfM["Measure Name"] + dfP["Name"] = dfP["Partition Name"] + dfRole["Name"] = dfRole["Role"] + dfD["Name"] = dfD["Dataset Name"] + dfH["Description"] = dfH["Hierarchy Description"] + dfM["Description"] = dfM["Measure Description"] + dfH["Hierarchy Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Hierarchy Name"] + ) - dfCI['Calculation Object'] = format_dax_object_name(dfCI['Calculation Group Name'], dfCI['Calculation Item Name']) + dfCI["Calculation Object"] = format_dax_object_name( + dfCI["Calculation Group Name"], dfCI["Calculation Item Name"] + ) - dfRole['Member Count'] = dfRM['Role'].isin(dfRole['Role']).sum() - dfRLS['Is Dynamic'] = dfRLS['Filter Expression'].str.contains(r'userprincipalname\s*\(', case=False) | dfRLS['Filter Expression'].str.contains(r'username\s*\(', case=False) + dfRole["Member Count"] = dfRM["Role"].isin(dfRole["Role"]).sum() + dfRLS["Is Dynamic"] = dfRLS["Filter Expression"].str.contains( + r"userprincipalname\s*\(", case=False + ) | dfRLS["Filter Expression"].str.contains(r"username\s*\(", case=False) # Partition Count - partition_count = dfP.groupby('Table Name').size().reset_index(name='Partition Count') - dfT = pd.merge(dfT, partition_count, left_on='Name', right_on='Table Name', how='left').drop('Table Name', axis=1) - dfT['Partition Count'] = dfT['Partition Count'].fillna(0).astype(int) - - dfT = dfT.merge(dfP[['Table Name', 'Partition Name']], how='left', left_on='Name', right_on='Table Name') - dfT['First Partition Name'] = dfT.groupby('Name')['Partition Name'].transform('first') - dfT.drop('Table Name', axis=1, inplace=True) - - dfC['Sort By Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Sort By Column']) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfM['Measure Object'] = "[" + dfM['Measure Name'] + "]" - dfM['Measure Fully Qualified'] = format_dax_object_name(dfM['Table Name'], dfM['Measure Name']) - dfM['Measure Fully Qualified No Spaces'] = dfM['Table Name'] + '[' + dfM['Measure Name'] + ']' - #dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) - dfC['Column Unqualified'] = "[" + dfC['Column Name'] + "]" - dfC['Column Object No Spaces'] = dfC.apply(lambda row: row['Table Name'] + '[' + row['Column Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) - dfC['Used in Sort By'] = dfC['Column Object'].isin(dfC['Sort By Column Object']) - dfH['Column Object'] = format_dax_object_name(dfH['Table Name'], dfH['Column Name']) - dfC['Used in Hierarchy'] = dfC['Column Object'].isin(dfH['Column Object']) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfT['Used in Relationship'] = dfT['Name'].isin(dfR['From Table']) | dfT['Name'].isin(dfR['To Table']) - dfT['Used in Relationship Both Sides'] = dfT['Name'].isin(dfR['From Table']) & dfT['Name'].isin(dfR['To Table']) - dfC['Used in Relationship'] = dfC['Column Object'].isin(dfR['From Object']) | dfC['Column Object'].isin(dfR['To Object']) - - dfR_filt = dfR[(dfR['Cross Filtering Behavior'] == 'BothDirections') | (dfR['Multiplicity'] == 'm:m')] - dfC['Used in M2M/BiDi Relationship'] = dfC['Column Object'].isin(dfR_filt['From Object']) | dfC['Column Object'].isin(dfR_filt['To Object']) - dfC['Foreign Key'] = dfC['Column Object'].isin(dfR[dfR['From Cardinality'] == 'Many']['From Object']) - dfC['Primary Key'] = dfC['Column Object'].isin(dfR[dfR['To Cardinality'] == 'One']['To Object']) - dfT['Used in M2M Relationship'] = dfT['Name'].isin(dfR[dfR['Multiplicity'] == 'm:m'][['From Table']]) | dfT['Name'].isin(dfR[dfR['Multiplicity'] == 'm:m'][['To Table']]) - dfT['Used in Dynamic RLS'] = dfT['Name'].isin(dfRLS[dfRLS['Is Dynamic']]['Table']) - dfT['Used in RLS'] = dfT['Name'].isin(dfRLS.loc[dfRLS['Filter Expression'].str.len() > 0, 'Table']) - dfC['Primary Key'] = dfC['Column Object'].isin(dfR.loc[dfR['To Cardinality'] == 'One', 'To Object']) - dfD['Has Date Table'] = any((r['Parent Data Category'] == 'Time') & (r['Data Type'] == 'DateTime') & (r['Key'] == True) for i, r in dfC.iterrows()) - #dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name']) - dfD['Relationship Count'] = len(dfR) - dfD['M2M or BiDi Relationship Count'] = len(dfR[(dfR['Multiplicity'] == 'm:m') | (dfR['Cross Filtering Behavior'] == 'BothDirections')]) - dfD['Calculation Group Count'] = len(dfT[dfT['Type'] == 'Calculation Group']) - dfT['Has Calculation Items'] = np.where((dfT['Type'] == 'Calculation Group') & dfT['Name'].isin(dfCI['Calculation Group Name']), True, False) - dfP['Partition Object'] = format_dax_object_name(dfP['Table Name'], dfP['Partition Name']) - dfRLS['RLS Object'] = format_dax_object_name(dfRLS['Role'], dfRLS['Table']) - - function_pattern = '|'.join(dfF['FUNCTION_NAME'].map(re.escape)) - - dfM['DQ Date Function Used'] = any(dfP['Mode'] == 'DirectQuery') & dfM['Measure Expression'].str.contains(f'({function_pattern})\\s*\\(', case=False, regex=True) - - md['Reference'] = "'" + md['Referenced Table'] + "'[" + md['Referenced Object'] + ']' - - dfC['Referenced By'] = md[(md['Referenced Object Type'] == 'Column') & (md['Reference'].isin(dfC['Column Object']))].groupby('Reference').size().reset_index(name='Count')['Count'] - dfC['Referenced By'].fillna(0, inplace=True) - dfC['Referenced By'] = dfC['Referenced By'].fillna(0).astype(int) - - dfM['Referenced By'] = md[(md['Referenced Object Type'] == 'Measure') & (md['Referenced Object'].isin(dfM['Measure Name']))].groupby('Referenced Object').size().reset_index(name='Count')['Count'] - dfM['Referenced By'].fillna(0, inplace=True) - dfM['Referenced By'] = dfM['Referenced By'].fillna(0).astype(int) + partition_count = ( + dfP.groupby("Table Name").size().reset_index(name="Partition Count") + ) + dfT = pd.merge( + dfT, partition_count, left_on="Name", right_on="Table Name", how="left" + ).drop("Table Name", axis=1) + dfT["Partition Count"] = dfT["Partition Count"].fillna(0).astype(int) + + dfT = dfT.merge( + dfP[["Table Name", "Partition Name"]], + how="left", + left_on="Name", + right_on="Table Name", + ) + dfT["First Partition Name"] = dfT.groupby("Name")["Partition Name"].transform( + "first" + ) + dfT.drop("Table Name", axis=1, inplace=True) + + dfC["Sort By Column Object"] = format_dax_object_name( + dfC["Table Name"], dfC["Sort By Column"] + ) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfM["Measure Object"] = "[" + dfM["Measure Name"] + "]" + dfM["Measure Fully Qualified"] = format_dax_object_name( + dfM["Table Name"], dfM["Measure Name"] + ) + dfM["Measure Fully Qualified No Spaces"] = ( + dfM["Table Name"] + "[" + dfM["Measure Name"] + "]" + ) + # dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) + dfC["Column Unqualified"] = "[" + dfC["Column Name"] + "]" + dfC["Column Object No Spaces"] = dfC.apply( + lambda row: ( + row["Table Name"] + "[" + row["Column Name"] + "]" + if " " not in row["Table Name"] + else "" + ), + axis=1, + ) + dfC["Used in Sort By"] = dfC["Column Object"].isin(dfC["Sort By Column Object"]) + dfH["Column Object"] = format_dax_object_name(dfH["Table Name"], dfH["Column Name"]) + dfC["Used in Hierarchy"] = dfC["Column Object"].isin(dfH["Column Object"]) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + dfT["Used in Relationship"] = dfT["Name"].isin(dfR["From Table"]) | dfT[ + "Name" + ].isin(dfR["To Table"]) + dfT["Used in Relationship Both Sides"] = dfT["Name"].isin(dfR["From Table"]) & dfT[ + "Name" + ].isin(dfR["To Table"]) + dfC["Used in Relationship"] = dfC["Column Object"].isin(dfR["From Object"]) | dfC[ + "Column Object" + ].isin(dfR["To Object"]) + + dfR_filt = dfR[ + (dfR["Cross Filtering Behavior"] == "BothDirections") + | (dfR["Multiplicity"] == "m:m") + ] + dfC["Used in M2M/BiDi Relationship"] = dfC["Column Object"].isin( + dfR_filt["From Object"] + ) | dfC["Column Object"].isin(dfR_filt["To Object"]) + dfC["Foreign Key"] = dfC["Column Object"].isin( + dfR[dfR["From Cardinality"] == "Many"]["From Object"] + ) + dfC["Primary Key"] = dfC["Column Object"].isin( + dfR[dfR["To Cardinality"] == "One"]["To Object"] + ) + dfT["Used in M2M Relationship"] = dfT["Name"].isin( + dfR[dfR["Multiplicity"] == "m:m"][["From Table"]] + ) | dfT["Name"].isin(dfR[dfR["Multiplicity"] == "m:m"][["To Table"]]) + dfT["Used in Dynamic RLS"] = dfT["Name"].isin(dfRLS[dfRLS["Is Dynamic"]]["Table"]) + dfT["Used in RLS"] = dfT["Name"].isin( + dfRLS.loc[dfRLS["Filter Expression"].str.len() > 0, "Table"] + ) + dfC["Primary Key"] = dfC["Column Object"].isin( + dfR.loc[dfR["To Cardinality"] == "One", "To Object"] + ) + dfD["Has Date Table"] = any( + (r["Parent Data Category"] == "Time") + & (r["Data Type"] == "DateTime") + & (r["Key"] == True) + for i, r in dfC.iterrows() + ) + # dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name']) + dfD["Relationship Count"] = len(dfR) + dfD["M2M or BiDi Relationship Count"] = len( + dfR[ + (dfR["Multiplicity"] == "m:m") + | (dfR["Cross Filtering Behavior"] == "BothDirections") + ] + ) + dfD["Calculation Group Count"] = len(dfT[dfT["Type"] == "Calculation Group"]) + dfT["Has Calculation Items"] = np.where( + (dfT["Type"] == "Calculation Group") + & dfT["Name"].isin(dfCI["Calculation Group Name"]), + True, + False, + ) + dfP["Partition Object"] = format_dax_object_name( + dfP["Table Name"], dfP["Partition Name"] + ) + dfRLS["RLS Object"] = format_dax_object_name(dfRLS["Role"], dfRLS["Table"]) + + function_pattern = "|".join(dfF["FUNCTION_NAME"].map(re.escape)) + + dfM["DQ Date Function Used"] = any(dfP["Mode"] == "DirectQuery") & dfM[ + "Measure Expression" + ].str.contains(f"({function_pattern})\\s*\\(", case=False, regex=True) + + md["Reference"] = ( + "'" + md["Referenced Table"] + "'[" + md["Referenced Object"] + "]" + ) + + dfC["Referenced By"] = ( + md[ + (md["Referenced Object Type"] == "Column") + & (md["Reference"].isin(dfC["Column Object"])) + ] + .groupby("Reference") + .size() + .reset_index(name="Count")["Count"] + ) + dfC["Referenced By"].fillna(0, inplace=True) + dfC["Referenced By"] = dfC["Referenced By"].fillna(0).astype(int) + + dfM["Referenced By"] = ( + md[ + (md["Referenced Object Type"] == "Measure") + & (md["Referenced Object"].isin(dfM["Measure Name"])) + ] + .groupby("Referenced Object") + .size() + .reset_index(name="Count")["Count"] + ) + dfM["Referenced By"].fillna(0, inplace=True) + dfM["Referenced By"] = dfM["Referenced By"].fillna(0).astype(int) pattern = "[^\( ][a-zA-Z0-9_()-]+\[[^\[]+\]|'[^']+'\[[^\[]+\]|\[[^\[]+\]" - dfM['Has Fully Qualified Measure Reference'] = False - dfM['Has Unqualified Column Reference'] = False + dfM["Has Fully Qualified Measure Reference"] = False + dfM["Has Unqualified Column Reference"] = False for i, r in dfM.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - expr = r['Measure Expression'] + tName = r["Table Name"] + mName = r["Measure Name"] + expr = r["Measure Expression"] matches = re.findall(pattern, expr) for m in matches: - if m[0] == '[': - if (m in dfC['Column Unqualified'].values) and (dfC[dfC['Table Name'] == tName]['Column Unqualified'] == m).any(): - dfM.at[i, 'Has Unqualified Column Reference'] = True + if m[0] == "[": + if (m in dfC["Column Unqualified"].values) and ( + dfC[dfC["Table Name"] == tName]["Column Unqualified"] == m + ).any(): + dfM.at[i, "Has Unqualified Column Reference"] = True else: - if (m in dfM['Measure Fully Qualified'].values) | (m in dfM['Measure Fully Qualified No Spaces'].values): - dfM.at[i, 'Has Fully Qualified Measure Reference'] = True - - dfR['Inactive without USERELATIONSHIP'] = False - for i,r in dfR[dfR['Active'] == False].iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - - dfM_filt = dfM[dfM['Measure Expression'].str.contains("(?i)USERELATIONSHIP\s*\(\s*\'*" + fromTable + "\'*\[" + fromColumn + "\]\s*,\s*\'*" + toTable + "\'*\[" + toColumn + "\]" , regex=True)] + if (m in dfM["Measure Fully Qualified"].values) | ( + m in dfM["Measure Fully Qualified No Spaces"].values + ): + dfM.at[i, "Has Fully Qualified Measure Reference"] = True + + dfR["Inactive without USERELATIONSHIP"] = False + for i, r in dfR[dfR["Active"] == False].iterrows(): + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + + dfM_filt = dfM[ + dfM["Measure Expression"].str.contains( + "(?i)USERELATIONSHIP\s*\(\s*'*" + + fromTable + + "'*\[" + + fromColumn + + "\]\s*,\s*'*" + + toTable + + "'*\[" + + toColumn + + "\]", + regex=True, + ) + ] if len(dfM_filt) == 0: - dfR.at[i, 'Inactive without USERELATIONSHIP'] = True - - dfC['Used in RLS'] = ( - dfC['Column Object No Spaces'].isin(dfRLS['Filter Expression']) | - dfC['Column Object'].isin(dfRLS['Filter Expression']) | - dfC.apply(lambda row: any(row['Column Name'] in expr for expr in dfRLS.loc[dfRLS['Table'] == row['Table Name'], 'Filter Expression']), axis=1) + dfR.at[i, "Inactive without USERELATIONSHIP"] = True + + dfC["Used in RLS"] = ( + dfC["Column Object No Spaces"].isin(dfRLS["Filter Expression"]) + | dfC["Column Object"].isin(dfRLS["Filter Expression"]) + | dfC.apply( + lambda row: any( + row["Column Name"] in expr + for expr in dfRLS.loc[ + dfRLS["Table"] == row["Table Name"], "Filter Expression" + ] + ), + axis=1, + ) ) # Merge dfR and dfC based on 'From Object' and 'Column Object' - merged_from = pd.merge(dfR, dfC, left_on='From Object', right_on='Column Object', how='left') - merged_to = pd.merge(dfR, dfC, left_on='To Object', right_on='Column Object', how='left') + merged_from = pd.merge( + dfR, dfC, left_on="From Object", right_on="Column Object", how="left" + ) + merged_to = pd.merge( + dfR, dfC, left_on="To Object", right_on="Column Object", how="left" + ) - dfR['From Column Data Type'] = merged_from['Data Type'] - dfR['To Column Data Type'] = merged_to['Data Type'] + dfR["From Column Data Type"] = merged_from["Data Type"] + dfR["To Column Data Type"] = merged_to["Data Type"] # Check if USERELATIONSHIP objects are used in a given column, table - userelationship_pattern = re.compile(r"USERELATIONSHIP\s*\(\s*(.*?)\s*,\s*(.*?)\s*\)", re.DOTALL | re.IGNORECASE) + userelationship_pattern = re.compile( + r"USERELATIONSHIP\s*\(\s*(.*?)\s*,\s*(.*?)\s*\)", re.DOTALL | re.IGNORECASE + ) # Function to extract objects within USERELATIONSHIP function def extract_objects(measure_expression): @@ -543,28 +1094,47 @@ def extract_objects(measure_expression): else: return [] - dfM['USERELATIONSHIP Objects'] = dfM['Measure Expression'].apply(extract_objects) - flat_object_list = [item for sublist in dfM['USERELATIONSHIP Objects'] for item in sublist] - dfC['USERELATIONSHIP Used'] = dfC['Column Object'].isin(flat_object_list) | dfC['Column Object No Spaces'].isin(flat_object_list) - dfT['USERELATIONSHIP Used'] = dfT['Name'].isin(dfC[dfC['USERELATIONSHIP Used']]['Table Name']) - dfR['Relationship Name'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) + ' -> ' + format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfH = dfH[['Name', 'Description', 'Table Name', 'Hierarchy Name', 'Hierarchy Description', 'Hierarchy Object']].drop_duplicates() + dfM["USERELATIONSHIP Objects"] = dfM["Measure Expression"].apply(extract_objects) + flat_object_list = [ + item for sublist in dfM["USERELATIONSHIP Objects"] for item in sublist + ] + dfC["USERELATIONSHIP Used"] = dfC["Column Object"].isin(flat_object_list) | dfC[ + "Column Object No Spaces" + ].isin(flat_object_list) + dfT["USERELATIONSHIP Used"] = dfT["Name"].isin( + dfC[dfC["USERELATIONSHIP Used"]]["Table Name"] + ) + dfR["Relationship Name"] = ( + format_dax_object_name(dfR["From Table"], dfR["From Column"]) + + " -> " + + format_dax_object_name(dfR["To Table"], dfR["To Column"]) + ) + dfH = dfH[ + [ + "Name", + "Description", + "Table Name", + "Hierarchy Name", + "Hierarchy Description", + "Hierarchy Object", + ] + ].drop_duplicates() scope_to_dataframe = { - 'Table': (dfT, ['Name']), - 'Partition': (dfP, ['Partition Object']), - 'Column': (dfC, ['Column Object']), - 'Hierarchy': (dfH, ['Hierarchy Object']), - 'Measure': (dfM, ['Measure Name']), - 'Calculation Item': (dfCI, ['Calculation Object']), - 'Relationship': (dfR, ['Relationship Name']), - 'Row Level Security': (dfRLS, ['RLS Object']), - 'Role': (dfRole, ['Role']), - 'Model': (dfD, ['Dataset Name']) + "Table": (dfT, ["Name"]), + "Partition": (dfP, ["Partition Object"]), + "Column": (dfC, ["Column Object"]), + "Hierarchy": (dfH, ["Hierarchy Object"]), + "Measure": (dfM, ["Measure Name"]), + "Calculation Item": (dfCI, ["Calculation Object"]), + "Relationship": (dfR, ["Relationship Name"]), + "Row Level Security": (dfRLS, ["RLS Object"]), + "Role": (dfRole, ["Role"]), + "Model": (dfD, ["Dataset Name"]), } def execute_rule(row): - scopes = row['Scope'] + scopes = row["Scope"] # support both str and list as scope type if isinstance(scopes, str): @@ -577,10 +1147,10 @@ def execute_rule(row): # common fields for each scope (df, violation_cols_or_func) = scope_to_dataframe[scope] - if scope in ['Hierarchy', 'Measure'] and len(df) == 0: + if scope in ["Hierarchy", "Measure"] and len(df) == 0: continue # execute rule and subset df - df_violations = df[row['Expression'](df)] + df_violations = df[row["Expression"](df)] # subset the right output columns (e.g. Table Name & Column Name) if isinstance(violation_cols_or_func, list): @@ -589,46 +1159,49 @@ def execute_rule(row): violation_func = violation_cols_or_func # build output data frame - df_output = violation_func(df_violations).copy() - - df_output.columns = ['Object Name'] - df_output['Rule Name'] = row['Rule Name'] - df_output['Category'] = row['Category'] - - df_output['Object Type'] = scope - df_output['Severity'] = row['Severity'] - df_output['Description'] = row['Description'] - df_output['URL'] = row['URL'] + df_output = violation_func(df_violations).copy() + + df_output.columns = ["Object Name"] + df_output["Rule Name"] = row["Rule Name"] + df_output["Category"] = row["Category"] + + df_output["Object Type"] = scope + df_output["Severity"] = row["Severity"] + df_output["Description"] = row["Description"] + df_output["URL"] = row["URL"] df_outputs.append(df_output) - + return df_outputs # flatten list of lists flatten_dfs = [ - df - for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() - for df in dfs] + df for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() for df in dfs + ] finalDF = pd.concat(flatten_dfs, ignore_index=True) if export: lakeAttach = lakehouse_attached() if lakeAttach == False: - print(f"In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") + print( + f"In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) return dfExport = finalDF.copy() delta_table_name = "modelbparesults" lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) - lakeT = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeT_filt = lakeT[lakeT['Table Name'] == delta_table_name] + lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name] - dfExport['Severity'].replace('⚠️', 'Warning', inplace=True) - dfExport['Severity'].replace('\u274C', 'Error', inplace=True) - dfExport['Severity'].replace('ℹ️', 'Info', inplace=True) + dfExport["Severity"].replace("⚠️", "Warning", inplace=True) + dfExport["Severity"].replace("\u274C", "Error", inplace=True) + dfExport["Severity"].replace("ℹ️", "Info", inplace=True) spark = SparkSession.builder.getOrCreate() query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}" @@ -641,36 +1214,50 @@ def execute_rule(row): runId = maxRunId + 1 now = datetime.datetime.now() - dfExport['Workspace Name'] = workspace - dfExport['Dataset Name'] = dataset - dfExport['Timestamp'] = now - dfExport['RunId'] = runId + dfExport["Workspace Name"] = workspace + dfExport["Dataset Name"] = dataset + dfExport["Timestamp"] = now + dfExport["RunId"] = runId - dfExport['RunId'] = dfExport['RunId'].astype('int') + dfExport["RunId"] = dfExport["RunId"].astype("int") - colName = 'Workspace Name' + colName = "Workspace Name" dfExport.insert(0, colName, dfExport.pop(colName)) - colName = 'Dataset Name' + colName = "Dataset Name" dfExport.insert(1, colName, dfExport.pop(colName)) - dfExport.columns = dfExport.columns.str.replace(' ', '_') + dfExport.columns = dfExport.columns.str.replace(" ", "_") spark_df = spark.createDataFrame(dfExport) - spark_df.write.mode('append').format('delta').saveAsTable(delta_table_name) - print(f"\u2022 Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table.") + spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name) + print( + f"\u2022 Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table." + ) if return_dataframe: return finalDF - - pd.set_option('display.max_colwidth', 100) - finalDF = (finalDF[['Category', 'Rule Name', 'Object Type', 'Object Name' , 'Severity', 'Description', 'URL']] - .sort_values(['Category', 'Rule Name', 'Object Type', 'Object Name']) - .set_index(['Category', 'Rule Name'])) - + pd.set_option("display.max_colwidth", 100) + + finalDF = ( + finalDF[ + [ + "Category", + "Rule Name", + "Object Type", + "Object Name", + "Severity", + "Description", + "URL", + ] + ] + .sort_values(["Category", "Rule Name", "Object Type", "Object Name"]) + .set_index(["Category", "Rule Name"]) + ) + bpa2 = finalDF.reset_index() bpa_dict = { - cat: bpa2[bpa2['Category'] == cat].drop("Category", axis=1) - for cat in bpa2['Category'].drop_duplicates().values + cat: bpa2[bpa2["Category"] == cat].drop("Category", axis=1) + for cat in bpa2["Category"].drop_duplicates().values } styles = """ @@ -727,39 +1314,41 @@ def execute_rule(row): # HTML for tabs tab_html = '
' - content_html = '' + content_html = "" for i, (title, df) in enumerate(bpa_dict.items()): if df.shape[0] == 0: continue tab_id = f"tab{i}" - active_class = '' + active_class = "" if i == 0: - active_class = 'active' + active_class = "active" - summary = " + ".join([f'{idx} ({v})' for idx, v in df['Severity'].value_counts().items()]) + summary = " + ".join( + [f"{idx} ({v})" for idx, v in df["Severity"].value_counts().items()] + ) tab_html += f'' content_html += f'
' # Adding tooltip for Rule Name using Description column content_html += '' - content_html += '' + content_html += "" for _, row in df.iterrows(): - content_html += f'' + content_html += f"" if pd.notnull(row["URL"]): - content_html += f'' - elif pd.notnull(row['Description']): - content_html += f'' + content_html += f'' + elif pd.notnull(row["Description"]): + content_html += f'' else: - content_html += f'' + content_html += f'' content_html += f'' content_html += f'' content_html += f'' - content_html += f'' - content_html += '
Rule NameObject TypeObject NameSeverity
Rule NameObject TypeObject NameSeverity
{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Rule Name"]}{row["Object Type"]}{row["Object Name"]}{row["Severity"]}
' + content_html += f"" + content_html += "" - content_html += '
' - tab_html += '
' + content_html += "" + tab_html += "" # Display the tabs, tab contents, and run the script - return display(HTML(styles + tab_html + content_html + script)) \ No newline at end of file + return display(HTML(styles + tab_html + content_html + script)) diff --git a/sempy_labs/OneLakeIntegration.py b/sempy_labs/OneLakeIntegration.py index cd09c1fc..d73d104d 100644 --- a/sempy_labs/OneLakeIntegration.py +++ b/sempy_labs/OneLakeIntegration.py @@ -4,9 +4,14 @@ from typing import List, Optional, Union from sempy._utils._log import log -@log -def export_model_to_onelake(dataset: str, workspace: Optional[str] = None, destination_lakehouse: Optional[str] = None, destination_workspace: Optional[str] = None): +@log +def export_model_to_onelake( + dataset: str, + workspace: Optional[str] = None, + destination_lakehouse: Optional[str] = None, + destination_workspace: Optional[str] = None, +): """ Exports a semantic model's tables to delta tables in the lakehouse. Creates shortcuts to the tables if a lakehouse is specified. @@ -22,11 +27,11 @@ def export_model_to_onelake(dataset: str, workspace: Optional[str] = None, desti The name of the Fabric lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. destination_workspace : str, default=None The name of the Fabric workspace in which the lakehouse resides. - + Returns ------- - + """ if workspace == None: @@ -41,11 +46,13 @@ def export_model_to_onelake(dataset: str, workspace: Optional[str] = None, desti else: destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) - dfD = fabric.list_datasets(workspace = workspace) - dfD_filt = dfD[dfD['Dataset Name'] == dataset] + dfD = fabric.list_datasets(workspace=workspace) + dfD_filt = dfD[dfD["Dataset Name"] == dataset] if len(dfD_filt) == 0: - print(f"The '{dataset}' semantic model does not exist in the '{workspace}' workspace.") + print( + f"The '{dataset}' semantic model does not exist in the '{workspace}' workspace." + ) return tmsl = f""" @@ -64,63 +71,94 @@ def export_model_to_onelake(dataset: str, workspace: Optional[str] = None, desti # Export model's tables as delta tables try: - fabric.execute_tmsl(script = tmsl, workspace = workspace) - print(f"The '{dataset}' semantic model's tables have been exported as delta tables to the '{workspace}' workspace.\n") + fabric.execute_tmsl(script=tmsl, workspace=workspace) + print( + f"The '{dataset}' semantic model's tables have been exported as delta tables to the '{workspace}' workspace.\n" + ) except: - print(f"ERROR: The '{dataset}' semantic model's tables have not been exported as delta tables to the '{workspace}' workspace.") - print(f"Make sure you enable OneLake integration for the '{dataset}' semantic model. Follow the instructions here: https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration") + print( + f"ERROR: The '{dataset}' semantic model's tables have not been exported as delta tables to the '{workspace}' workspace." + ) + print( + f"Make sure you enable OneLake integration for the '{dataset}' semantic model. Follow the instructions here: https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration" + ) return - + # Create shortcuts if destination lakehouse is specified if destination_lakehouse is not None: # Destination... - dfI_Dest = fabric.list_items(workspace = destination_workspace, type = 'Lakehouse') - dfI_filt = dfI_Dest[(dfI_Dest['Display Name'] == destination_lakehouse)] + dfI_Dest = fabric.list_items(workspace=destination_workspace, type="Lakehouse") + dfI_filt = dfI_Dest[(dfI_Dest["Display Name"] == destination_lakehouse)] if len(dfI_filt) == 0: - print(f"The '{destination_lakehouse}' lakehouse does not exist within the '{destination_workspace}' workspace.") + print( + f"The '{destination_lakehouse}' lakehouse does not exist within the '{destination_workspace}' workspace." + ) # Create lakehouse - destination_lakehouse_id = fabric.create_lakehouse(display_name = destination_lakehouse, workspace = destination_workspace) - print(f"The '{destination_lakehouse}' lakehouse has been created within the '{destination_workspace}' workspace.\n") + destination_lakehouse_id = fabric.create_lakehouse( + display_name=destination_lakehouse, workspace=destination_workspace + ) + print( + f"The '{destination_lakehouse}' lakehouse has been created within the '{destination_workspace}' workspace.\n" + ) else: - destination_lakehouse_id = dfI_filt['Id'].iloc[0] + destination_lakehouse_id = dfI_filt["Id"].iloc[0] # Source... - dfI_Source = fabric.list_items(workspace = workspace, type = 'SemanticModel') - dfI_filtSource = dfI_Source[(dfI_Source['Display Name'] == dataset)] - sourceLakehouseId = dfI_filtSource['Id'].iloc[0] + dfI_Source = fabric.list_items(workspace=workspace, type="SemanticModel") + dfI_filtSource = dfI_Source[(dfI_Source["Display Name"] == dataset)] + sourceLakehouseId = dfI_filtSource["Id"].iloc[0] # Valid tables - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, additional_xmla_properties=['Parent.SystemManaged']) - dfP_filt = dfP[(dfP['Mode'] == 'Import') & (dfP['Source Type'] != 'CalculationGroup') & (dfP['Parent System Managed'] == False)] - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - tmc = pd.DataFrame(dfP.groupby('Table Name')['Mode'].nunique()).reset_index() - oneMode = tmc[tmc['Mode'] == 1] - tableAll = dfP_filt[dfP_filt['Table Name'].isin(dfC['Table Name'].values) & (dfP_filt['Table Name'].isin(oneMode['Table Name'].values))] - tables = tableAll['Table Name'].unique() + dfP = fabric.list_partitions( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["Parent.SystemManaged"], + ) + dfP_filt = dfP[ + (dfP["Mode"] == "Import") + & (dfP["Source Type"] != "CalculationGroup") + & (dfP["Parent System Managed"] == False) + ] + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + tmc = pd.DataFrame(dfP.groupby("Table Name")["Mode"].nunique()).reset_index() + oneMode = tmc[tmc["Mode"] == 1] + tableAll = dfP_filt[ + dfP_filt["Table Name"].isin(dfC["Table Name"].values) + & (dfP_filt["Table Name"].isin(oneMode["Table Name"].values)) + ] + tables = tableAll["Table Name"].unique() client = fabric.FabricRestClient() print("Creating shortcuts...\n") - for tableName in tables: - tablePath = 'Tables/' + tableName - shortcutName = tableName.replace(' ','') + for tableName in tables: + tablePath = "Tables/" + tableName + shortcutName = tableName.replace(" ", "") request_body = { - "path": 'Tables', - "name": shortcutName, - "target": { - "oneLake": { - "workspaceId": workspace_id, - "itemId": sourceLakehouseId, - "path": tablePath} - } + "path": "Tables", + "name": shortcutName, + "target": { + "oneLake": { + "workspaceId": workspace_id, + "itemId": sourceLakehouseId, + "path": tablePath, + } + }, } try: - response = client.post(f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts",json=request_body) - if response.status_code == 201: - print(f"\u2022 The shortcut '{shortcutName}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace}' workspace. It is based on the '{tableName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.\n") + response = client.post( + f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts", + json=request_body, + ) + if response.status_code == 201: + print( + f"\u2022 The shortcut '{shortcutName}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace}' workspace. It is based on the '{tableName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.\n" + ) else: print(response.status_code) except: - print(f"ERROR: Failed to create a shortcut for the '{tableName}' table.") \ No newline at end of file + print( + f"ERROR: Failed to create a shortcut for the '{tableName}' table." + ) diff --git a/sempy_labs/QSO.py b/sempy_labs/QSO.py index a685e94b..10e74e98 100644 --- a/sempy_labs/QSO.py +++ b/sempy_labs/QSO.py @@ -1,16 +1,12 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import resolve_dataset_id +from ._helper_functions import resolve_dataset_id from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' def qso_sync(dataset: str, workspace: Optional[str] = None): - """ Triggers a query scale-out sync of read-only replicas for the specified dataset from the specified workspace. @@ -22,14 +18,13 @@ def qso_sync(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - - """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group + """ + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group if workspace is None: workspace_id = fabric.get_workspace_id() @@ -40,15 +35,21 @@ def qso_sync(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) client = fabric.PowerBIRestClient() - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/sync") + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/sync" + ) if response.status_code == 200: - print(f"{green_dot} QSO sync initiated for the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.green_dot} QSO sync initiated for the '{dataset}' semantic model within the '{workspace}' workspace." + ) else: - print(f"{red_dot} QSO sync failed for the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.red_dot} QSO sync failed for the '{dataset}' semantic model within the '{workspace}' workspace." + ) -def qso_sync_status(dataset: str, workspace: Optional[str] = None): +def qso_sync_status(dataset: str, workspace: Optional[str] = None): """ Returns the query scale-out sync status for the specified dataset from the specified workspace. @@ -63,14 +64,28 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): Returns ------- - - """ - - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group - df = pd.DataFrame(columns=['Scale Out Status', 'Sync Start Time', 'Sync End Time', 'Commit Version', 'Commit Timestamp', 'Target Sync Version', 'Target Sync Timestamp', 'Trigger Reason', 'Min Active Read Version', 'Min Active Read Timestamp']) - dfRep = pd.DataFrame(columns=['Replica ID', 'Replica Type', 'Replica Version', 'Replica Timestamp']) + """ + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group + + df = pd.DataFrame( + columns=[ + "Scale Out Status", + "Sync Start Time", + "Sync End Time", + "Commit Version", + "Commit Timestamp", + "Target Sync Version", + "Target Sync Timestamp", + "Trigger Reason", + "Min Active Read Version", + "Min Active Read Timestamp", + ] + ) + dfRep = pd.DataFrame( + columns=["Replica ID", "Replica Type", "Replica Version", "Replica Timestamp"] + ) if workspace is None: workspace_id = fabric.get_workspace_id() @@ -81,29 +96,51 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) client = fabric.PowerBIRestClient() - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/syncStatus") + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/syncStatus" + ) if response.status_code == 200: o = response.json() - sos = o['scaleOutStatus'] - - if sos == 'Enabled': - new_data = {'Scale Out Status': o['scaleOutStatus'], 'Sync Start Time': o['syncStartTime'], 'Sync End Time': o['syncEndTime'], 'Commit Version': o['commitVersion'], 'Commit Timestamp': o['commitTimestamp'], 'Target Sync Version': o['targetSyncVersion'], 'Target Sync Timestamp': o['targetSyncTimestamp'], 'Trigger Reason': o['triggerReason'], 'Min Active Read Version': o['minActiveReadVersion'], 'Min Active Read Timestamp': o['minActiveReadTimestamp']} + sos = o["scaleOutStatus"] + + if sos == "Enabled": + new_data = { + "Scale Out Status": o["scaleOutStatus"], + "Sync Start Time": o["syncStartTime"], + "Sync End Time": o["syncEndTime"], + "Commit Version": o["commitVersion"], + "Commit Timestamp": o["commitTimestamp"], + "Target Sync Version": o["targetSyncVersion"], + "Target Sync Timestamp": o["targetSyncTimestamp"], + "Trigger Reason": o["triggerReason"], + "Min Active Read Version": o["minActiveReadVersion"], + "Min Active Read Timestamp": o["minActiveReadTimestamp"], + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for r in o['scaleOutReplicas']: - new_data = {'Replica ID': r['replicaId'], 'Replica Type': r['replicaType'], 'Replica Version': str(r['replicaVersion']), 'Replica Timestamp': r['replicaTimestamp']} - dfRep = pd.concat([dfRep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Sync Start Time'] = pd.to_datetime(df['Sync Start Time']) - df['Sync End Time'] = pd.to_datetime(df['Sync End Time']) - df['Commit Timestamp'] = pd.to_datetime(df['Commit Timestamp']) - df['Target Sync Timestamp'] = pd.to_datetime(df['Target Sync Timestamp']) - df['Min Active Read Timestamp'] = pd.to_datetime(df['Min Active Read Timestamp']) - dfRep['Replica Timestamp'] = pd.to_datetime(dfRep['Replica Timestamp']) - df['Commit Version'] = df['Commit Version'].astype('int') - df['Target Sync Version'] = df['Target Sync Version'].astype('int') - df['Min Active Read Version'] = df['Min Active Read Version'].astype('int') + for r in o["scaleOutReplicas"]: + new_data = { + "Replica ID": r["replicaId"], + "Replica Type": r["replicaType"], + "Replica Version": str(r["replicaVersion"]), + "Replica Timestamp": r["replicaTimestamp"], + } + dfRep = pd.concat( + [dfRep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + df["Sync Start Time"] = pd.to_datetime(df["Sync Start Time"]) + df["Sync End Time"] = pd.to_datetime(df["Sync End Time"]) + df["Commit Timestamp"] = pd.to_datetime(df["Commit Timestamp"]) + df["Target Sync Timestamp"] = pd.to_datetime(df["Target Sync Timestamp"]) + df["Min Active Read Timestamp"] = pd.to_datetime( + df["Min Active Read Timestamp"] + ) + dfRep["Replica Timestamp"] = pd.to_datetime(dfRep["Replica Timestamp"]) + df["Commit Version"] = df["Commit Version"].astype("int") + df["Target Sync Version"] = df["Target Sync Version"].astype("int") + df["Min Active Read Version"] = df["Min Active Read Version"].astype("int") return df, dfRep else: @@ -112,8 +149,8 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): else: return response.status_code -def disable_qso(dataset: str, workspace: Optional[str] = None): +def disable_qso(dataset: str, workspace: Optional[str] = None): """ Sets the max read-only replicas to 0, disabling query scale out. @@ -128,7 +165,7 @@ def disable_qso(dataset: str, workspace: Optional[str] = None): Returns ------- - + """ if workspace is None: @@ -139,23 +176,28 @@ def disable_qso(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) - request_body = { - "queryScaleOutSettings": { - "maxReadOnlyReplicas": '0' - } - } + request_body = {"queryScaleOutSettings": {"maxReadOnlyReplicas": "0"}} client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json=request_body + ) if response.status_code == 200: - df = list_qso_settings(dataset = dataset, workspace = workspace) - print(f"{green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace.") + df = list_qso_settings(dataset=dataset, workspace=workspace) + print( + f"{icons.green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace." + ) return df else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" -def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replicas: Optional[int] = -1, workspace: Optional[str] = None): +def set_qso( + dataset: str, + auto_sync: Optional[bool] = True, + max_read_only_replicas: Optional[int] = -1, + workspace: Optional[str] = None, +): """ Sets the query scale out settings for a semantic model. @@ -174,10 +216,10 @@ def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replic Returns ------- - + """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group if workspace is None: workspace_id = fabric.get_workspace_id() @@ -188,34 +230,46 @@ def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replic dataset_id = resolve_dataset_id(dataset, workspace) if max_read_only_replicas == 0: - disable_qso(dataset = dataset, workspace = workspace) + disable_qso(dataset=dataset, workspace=workspace) return request_body = { "queryScaleOutSettings": { "autoSyncReadOnlyReplicas": auto_sync, - "maxReadOnlyReplicas": str(max_read_only_replicas) + "maxReadOnlyReplicas": str(max_read_only_replicas), } } - ssm = set_semantic_model_storage_format(dataset = dataset, storage_format='Large', workspace=workspace) + ssm = set_semantic_model_storage_format( + dataset=dataset, storage_format="Large", workspace=workspace + ) if ssm == 200: client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", + json=request_body, + ) if response.status_code == 200: - df = list_qso_settings(dataset = dataset, workspace = workspace) - print(f"{green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace.") + df = list_qso_settings(dataset=dataset, workspace=workspace) + print( + f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace." + ) return df else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" else: - print(f"{red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out.") - print("https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites") + print( + f"{icons.red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out." + ) + print( + "https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites" + ) return - -def set_semantic_model_storage_format(dataset: str, storage_format: str, workspace: Optional[str] = None): +def set_semantic_model_storage_format( + dataset: str, storage_format: str, workspace: Optional[str] = None +): """ Sets the semantic model storage format. @@ -232,7 +286,7 @@ def set_semantic_model_storage_format(dataset: str, storage_format: str, workspa Returns ------- - + """ if workspace is None: @@ -245,35 +299,37 @@ def set_semantic_model_storage_format(dataset: str, storage_format: str, workspa storage_format = storage_format.capitalize() - if storage_format == 'Abf': - storage_format = 'Small' - elif storage_format.startswith('Premium'): - storage_format = 'Large' + if storage_format == "Abf": + storage_format = "Small" + elif storage_format.startswith("Premium"): + storage_format = "Large" - storageFormats = ['Small', 'Large'] + storageFormats = ["Small", "Large"] - if storage_format == 'Large': - request_body = { - "targetStorageMode": "PremiumFiles" - } - elif storage_format == 'Small': - request_body = { - "targetStorageMode": "Abf" - } + if storage_format == "Large": + request_body = {"targetStorageMode": "PremiumFiles"} + elif storage_format == "Small": + request_body = {"targetStorageMode": "Abf"} else: - print(f"{red_dot} Invalid storage format value. Valid options: {storageFormats}.") + print( + f"{icons.red_dot} Invalid storage format value. Valid options: {storageFormats}." + ) return client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json=request_body + ) if response.status_code == 200: - return print(f"{green_dot} Semantic model storage format set to '{storage_format}'.") + return print( + f"{icons.green_dot} Semantic model storage format set to '{storage_format}'." + ) else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" -def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = None): +def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = None): """ Shows the query scale out settings for a semantic model (or all semantic models within a workspace). @@ -302,28 +358,48 @@ def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = dataset_id = resolve_dataset_id(dataset, workspace) workspace_id = fabric.get_workspace_id() - df = pd.DataFrame(columns=['Dataset Id', 'Dataset Name', 'Storage Mode', 'QSO Auto Sync Enabled', 'QSO Max Read Only Replicas']) + df = pd.DataFrame( + columns=[ + "Dataset Id", + "Dataset Name", + "Storage Mode", + "QSO Auto Sync Enabled", + "QSO Max Read Only Replicas", + ] + ) client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/groups/{workspace_id}/datasets") - for v in response.json()['value']: - tsm = v['targetStorageMode'] - if tsm == 'Abf': - sm = 'Small' + for v in response.json()["value"]: + tsm = v["targetStorageMode"] + if tsm == "Abf": + sm = "Small" else: - sm = 'Large' - new_data = {'Dataset Id': v['id'], 'Dataset Name': v['name'], 'Storage Mode': sm, 'QSO Auto Sync Enabled': v['queryScaleOutSettings']['autoSyncReadOnlyReplicas'], 'QSO Max Read Only Replicas': v['queryScaleOutSettings']['maxReadOnlyReplicas'] } + sm = "Large" + new_data = { + "Dataset Id": v["id"], + "Dataset Name": v["name"], + "Storage Mode": sm, + "QSO Auto Sync Enabled": v["queryScaleOutSettings"][ + "autoSyncReadOnlyReplicas" + ], + "QSO Max Read Only Replicas": v["queryScaleOutSettings"][ + "maxReadOnlyReplicas" + ], + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['QSO Auto Sync Enabled'] = df['QSO Auto Sync Enabled'].astype('bool') - df['QSO Max Read Only Replicas'] = df['QSO Max Read Only Replicas'].astype('int') - + df["QSO Auto Sync Enabled"] = df["QSO Auto Sync Enabled"].astype("bool") + df["QSO Max Read Only Replicas"] = df["QSO Max Read Only Replicas"].astype("int") + if dataset is not None: - df = df[df['Dataset Id'] == dataset_id] - + df = df[df["Dataset Id"] == dataset_id] + return df -def set_workspace_default_storage_format(storage_format: str, workspace: Optional[str] = None): +def set_workspace_default_storage_format( + storage_format: str, workspace: Optional[str] = None +): """ Sets the default storage format for semantic models within a workspace. @@ -338,17 +414,19 @@ def set_workspace_default_storage_format(storage_format: str, workspace: Optiona Returns ------- - + """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/groups/update-group#defaultdatasetstorageformat + # https://learn.microsoft.com/en-us/rest/api/power-bi/groups/update-group#defaultdatasetstorageformat - storageFormats = ['Small', 'Large'] + storageFormats = ["Small", "Large"] storage_format = storage_format.capitalize() if storage_format not in storageFormats: - print(f"Invalid storage format. Please choose from these options: {storageFormats}.") + print( + f"Invalid storage format. Please choose from these options: {storageFormats}." + ) if workspace is None: workspace_id = fabric.get_workspace_id() @@ -356,15 +434,14 @@ def set_workspace_default_storage_format(storage_format: str, workspace: Optiona else: workspace_id = fabric.resolve_workspace_id(workspace) - request_body = { - "name": workspace, - "defaultDatasetStorageFormat": storage_format - } + request_body = {"name": workspace, "defaultDatasetStorageFormat": storage_format} client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}", json = request_body) + response = client.patch(f"/v1.0/myorg/groups/{workspace_id}", json=request_body) if response.status_code == 200: - print(f"{green_dot} The default storage format for the '{workspace}' workspace has been updated to '{storage_format}.") + print( + f"{icons.green_dot} The default storage format for the '{workspace}' workspace has been updated to '{storage_format}." + ) else: - print(f"{red_dot} {response.status_code}") \ No newline at end of file + print(f"{icons.red_dot} {response.status_code}") diff --git a/sempy_labs/RefreshCalcTables.py b/sempy_labs/RefreshCalcTables.py index 010e9a0c..3fe8d733 100644 --- a/sempy_labs/RefreshCalcTables.py +++ b/sempy_labs/RefreshCalcTables.py @@ -6,15 +6,11 @@ from .TOM import connect_semantic_model from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): - """ Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model. @@ -26,17 +22,12 @@ def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - spark = SparkSession.builder.getOrCreate() start_time = datetime.datetime.now() @@ -45,66 +36,98 @@ def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): while not success: try: - with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom: + with connect_semantic_model( + dataset=dataset, readonly=True, workspace=workspace + ) as tom: success = True for a in tom.model.Annotations: if any(a.Name == t.Name for t in tom.model.Tables): tName = a.Name query = a.Value - - if not query.startswith('EVALUATE'): - daxquery = 'EVALUATE \n' + query + + if not query.startswith("EVALUATE"): + daxquery = "EVALUATE \n" + query else: daxquery = query try: - df = fabric.evaluate_dax(dataset = dataset, dax_string = daxquery, workspace = workspace) + df = fabric.evaluate_dax( + dataset=dataset, + dax_string=daxquery, + workspace=workspace, + ) # Update column names for non-field parameters - if query.find('NAMEOF') == -1: + if query.find("NAMEOF") == -1: for old_column_name in df.columns: pattern = r"\[([^\]]+)\]" - - matches = re.findall(pattern, old_column_name) + + matches = re.findall(pattern, old_column_name) new_column_name = matches[0] - new_column_name = new_column_name.replace(' ','') - - df.rename(columns={old_column_name: new_column_name}, inplace=True) + new_column_name = new_column_name.replace(" ", "") + + df.rename( + columns={old_column_name: new_column_name}, + inplace=True, + ) # Update data types for lakehouse columns - dataType = next(str(c.DataType) for c in tom.all_columns() if c.Parent.Name == tName and c.SourceColumn == new_column_name) - #dfC_type = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == new_column_name)] - #dataType = dfC_type['Data Type'].iloc[0] - - if dataType == 'Int64': - df[new_column_name] = df[new_column_name].astype(int) - elif dataType in ['Decimal', 'Double']: - df[new_column_name] = df[new_column_name].astype(float) - elif dataType == 'Boolean': - df[new_column_name] = df[new_column_name].astype(bool) - elif dataType == 'DateTime': - df[new_column_name] = pd.to_datetime(df[new_column_name]) + dataType = next( + str(c.DataType) + for c in tom.all_columns() + if c.Parent.Name == tName + and c.SourceColumn == new_column_name + ) + # dfC_type = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == new_column_name)] + # dataType = dfC_type['Data Type'].iloc[0] + + if dataType == "Int64": + df[new_column_name] = df[ + new_column_name + ].astype(int) + elif dataType in ["Decimal", "Double"]: + df[new_column_name] = df[ + new_column_name + ].astype(float) + elif dataType == "Boolean": + df[new_column_name] = df[ + new_column_name + ].astype(bool) + elif dataType == "DateTime": + df[new_column_name] = pd.to_datetime( + df[new_column_name] + ) else: - df[new_column_name] = df[new_column_name].astype(str) - #else: + df[new_column_name] = df[ + new_column_name + ].astype(str) + # else: # second_column_name = df.columns[1] # third_column_name = df.columns[2] # df[third_column_name] = df[third_column_name].astype(int) - # Remove calc columns from field parameters + # Remove calc columns from field parameters # mask = df[second_column_name].isin(dfC_filt['Full Column Name']) # df = df[~mask] - delta_table_name = tName.replace(' ','_') - print(f"{in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress...") + delta_table_name = tName.replace(" ", "_") + print( + f"{icons.in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress..." + ) spark_df = spark.createDataFrame(df) - spark_df.write.mode('overwrite').format('delta').saveAsTable(delta_table_name) - print(f"{green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse.") + spark_df.write.mode("overwrite").format( + "delta" + ).saveAsTable(delta_table_name) + print( + f"{icons.green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse." + ) except: - print(f"{red_dot} Failed to create calculated table '{tName}' as a delta table in the lakehouse.") + print( + f"{icons.red_dot} Failed to create calculated table '{tName}' as a delta table in the lakehouse." + ) except Exception as e: if datetime.datetime.now() - start_time > timeout: break - time.sleep(1) \ No newline at end of file + time.sleep(1) diff --git a/sempy_labs/RefreshSemanticModel.py b/sempy_labs/RefreshSemanticModel.py index 599bbb7f..747919fa 100644 --- a/sempy_labs/RefreshSemanticModel.py +++ b/sempy_labs/RefreshSemanticModel.py @@ -1,18 +1,22 @@ import sempy import sempy.fabric as fabric import time -from .HelperFunctions import resolve_dataset_id +from ._helper_functions import resolve_dataset_id from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log -def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] = None, partitions: Optional[Union[str, List[str]]] = None, refresh_type: Optional[str] = None, retry_count: Optional[int] = 0, apply_refresh_policy: Optional[bool] = True, workspace: Optional[str] = None): - +def refresh_semantic_model( + dataset: str, + tables: Optional[Union[str, List[str]]] = None, + partitions: Optional[Union[str, List[str]]] = None, + refresh_type: Optional[str] = None, + retry_count: Optional[int] = 0, + apply_refresh_policy: Optional[bool] = True, + workspace: Optional[str] = None, +): """ Refreshes a semantic model. @@ -37,7 +41,7 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] Returns ------- - + """ if workspace == None: @@ -45,7 +49,7 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] workspace = fabric.resolve_workspace_name(workspace_id) if refresh_type is None: - refresh_type = 'full' + refresh_type = "full" if isinstance(tables, str): tables = [tables] @@ -57,6 +61,7 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] if tables is not None: objects = objects + [{"table": table} for table in tables] if partitions is not None: + def extract_names(partition): parts = partition.split("[") table_name = parts[0].strip("'") @@ -65,43 +70,79 @@ def extract_names(partition): objects = objects + [extract_names(partition) for partition in partitions] - refresh_type = refresh_type.lower().replace('only', 'Only').replace('values', 'Values') + refresh_type = ( + refresh_type.lower().replace("only", "Only").replace("values", "Values") + ) - refreshTypes = ['full', 'automatic', 'dataOnly', 'calculate', 'clearValues', 'defragment'] + refreshTypes = [ + "full", + "automatic", + "dataOnly", + "calculate", + "clearValues", + "defragment", + ] if refresh_type not in refreshTypes: - print(f"{red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}.") + print( + f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}." + ) return - + if len(objects) == 0: - requestID = fabric.refresh_dataset(dataset = dataset, workspace = workspace, refresh_type = refresh_type, retry_count = retry_count, apply_refresh_policy = apply_refresh_policy) + requestID = fabric.refresh_dataset( + dataset=dataset, + workspace=workspace, + refresh_type=refresh_type, + retry_count=retry_count, + apply_refresh_policy=apply_refresh_policy, + ) else: - requestID = fabric.refresh_dataset(dataset = dataset, workspace = workspace, refresh_type = refresh_type, retry_count = retry_count, apply_refresh_policy = apply_refresh_policy, objects = objects) - print(f"{in_progress} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is in progress...") + requestID = fabric.refresh_dataset( + dataset=dataset, + workspace=workspace, + refresh_type=refresh_type, + retry_count=retry_count, + apply_refresh_policy=apply_refresh_policy, + objects=objects, + ) + print( + f"{icons.in_progress} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is in progress..." + ) if len(objects) != 0: print(objects) while True: - requestDetails = fabric.get_refresh_execution_details(dataset = dataset,refresh_request_id = requestID, workspace = workspace) + requestDetails = fabric.get_refresh_execution_details( + dataset=dataset, refresh_request_id=requestID, workspace=workspace + ) status = requestDetails.status # Check if the refresh has completed - if status == 'Completed': + if status == "Completed": break - elif status == 'Failed': - print(f"{red_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has failed.") + elif status == "Failed": + print( + f"{icons.red_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has failed." + ) return - elif status == 'Cancelled': - print(f"{yellow_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled.") + elif status == "Cancelled": + print( + f"{icons.yellow_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled." + ) return time.sleep(3) - print(f"{green_dot} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is complete.") + print( + f"{icons.green_dot} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is complete." + ) -@log -def cancel_dataset_refresh(dataset: str, request_id: Optional[str] = None, workspace: Optional[str] = None): +@log +def cancel_dataset_refresh( + dataset: str, request_id: Optional[str] = None, workspace: Optional[str] = None +): """ Cancels the refresh of a semantic model which was executed via the [Enhanced Refresh API](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh). @@ -110,7 +151,7 @@ def cancel_dataset_refresh(dataset: str, request_id: Optional[str] = None, works dataset : str Name of the semantic model. request_id : str, default=None - The request id of a semantic model refresh. + The request id of a semantic model refresh. Defaults to finding the latest active refresh of the semantic model. workspace : str, default=None The Fabric workspace name. @@ -119,31 +160,36 @@ def cancel_dataset_refresh(dataset: str, request_id: Optional[str] = None, works Returns ------- - - """ + + """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) else: workspace_id = fabric.resolve_workspace_id(workspace) - - rr = fabric.list_refresh_requests(dataset = dataset, workspace = workspace) - rr_filt = rr[rr['Status'] == 'Unknown'] + + rr = fabric.list_refresh_requests(dataset=dataset, workspace=workspace) + rr_filt = rr[rr["Status"] == "Unknown"] if request_id == None: if len(rr_filt) == 0: - print(f"{red_dot} There are no active Enhanced API refreshes of the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.red_dot} There are no active Enhanced API refreshes of the '{dataset}' semantic model within the '{workspace}' workspace." + ) return - request_id = rr_filt['Request Id'].iloc[0] - - dataset_id = resolve_dataset_id(dataset = dataset, workspace = workspace) + request_id = rr_filt["Request Id"].iloc[0] + + dataset_id = resolve_dataset_id(dataset=dataset, workspace=workspace) client = fabric.PowerBIRestClient() - response = client.delete(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshes/{request_id}") + response = client.delete( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshes/{request_id}" + ) if response.status_code == 200: - print(f"{green_dot} The '{request_id}' refresh request for the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled.") + print( + f"{icons.green_dot} The '{request_id}' refresh request for the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled." + ) else: print(response.status_code) - diff --git a/sempy_labs/ReportFunctions.py b/sempy_labs/ReportFunctions.py deleted file mode 100644 index 6c6d3b52..00000000 --- a/sempy_labs/ReportFunctions.py +++ /dev/null @@ -1,742 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, os, time, base64, copy, re -from anytree import Node, RenderTree -from powerbiclient import Report -from synapse.ml.services import Translate -from pyspark.sql.functions import col, flatten -from pyspark.sql import SparkSession -from .GenerateReport import update_report_from_reportjson -from .Translations import language_validate -from .Lakehouse import lakehouse_attached -from .HelperFunctions import generate_embedded_filter, resolve_dataset_name, resolve_report_id, resolve_lakehouse_name -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def get_report_json(report: str, workspace: Optional[str] = None, save_to_file_name: Optional[str] = None): - - """ - Gets the report.json file content of a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - save_to_file_name : str, default=None - Specifying this parameter will save the report.json file to the lakehouse attached to the notebook with the file name of this parameter. - - Returns - ------- - str - The report.json file for a given Power BI report. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - client = fabric.FabricRestClient() - - dfI = fabric.list_items(workspace = workspace, type = 'Report') - dfI_filt = dfI[(dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist in the '{workspace}' workspace.") - return - - itemId = dfI_filt['Id'].iloc[0] - response = client.post(f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition") - df_items = pd.json_normalize(response.json()['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'report.json'] - payload = df_items_filt['payload'].iloc[0] - - reportFile = base64.b64decode(payload).decode('utf-8') - reportJson = json.loads(reportFile) - - if save_to_file_name is not None: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"{red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - folderPath = '/lakehouse/default/Files' - fileExt = '.json' - if not save_to_file_name.endswith(fileExt): - save_to_file_name = save_to_file_name + fileExt - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: - json.dump(reportJson, json_file, indent=4) - print(f"{green_dot} The report.json file for the '{report}' report has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n") - - return reportJson - -def report_dependency_tree(workspace: Optional[str] = None): - - """ - Prints a dependency between reports and semantic models. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspaceId = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspaceId) - - dfR = fabric.list_reports(workspace = workspace) - dfD = fabric.list_datasets(workspace = workspace) - dfR = pd.merge(dfR, dfD[['Dataset ID', 'Dataset Name']], left_on = 'Dataset Id', right_on = 'Dataset ID', how = 'left') - dfR.rename(columns={'Name': 'Report Name'}, inplace=True) - dfR = dfR[['Report Name', 'Dataset Name']] - - report_icon = '\U0001F4F6' - dataset_icon = '\U0001F9CA' - workspace_icon = '\U0001F465' - - node_dict = {} - rootNode = Node(workspace) - node_dict[workspace] = rootNode - rootNode.custom_property = workspace_icon + ' ' - - for i, r in dfR.iterrows(): - datasetName = r['Dataset Name'] - reportName = r['Report Name'] - parentNode = node_dict.get(datasetName) - if parentNode is None: - parentNode = Node(datasetName, parent = rootNode) - node_dict[datasetName] = parentNode - parentNode.custom_property = dataset_icon + ' ' - - child_node = Node(reportName, parent=parentNode) - child_node.custom_property = report_icon + ' ' - - # Print the tree structure - for pre, _, node in RenderTree(node_dict[workspace]): - print(f"{pre}{node.custom_property}'{node.name}'") - -@log -def export_report(report: str, export_format: str, file_name: Optional[str] = None, bookmark_name: Optional[str] = None, page_name: Optional[str] = None, visual_name: Optional[str] = None, report_filter: Optional[str] = None, workspace: Optional[str] = None): - - """ - Exports a Power BI report to a file in your lakehouse. - - Parameters - ---------- - report : str - Name of the Power BI report. - export_format : str - The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. - file_name : str, default=None - The name of the file to be saved within the lakehouse. Do not include the file extension. Defaults ot the reportName parameter value. - bookmark_name : str, default=None - The name (GUID) of a bookmark within the report. - page_name : str, default=None - The name (GUID) of the report page. - visual_name : str, default=None - The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. - report_filter : str, default=None - A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - #https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group - - lakeAttach = lakehouse_attached() - - if lakeAttach == False: - print(f"{red_dot} In order to run the 'export_report' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if isinstance(page_name,str): - page_name = [page_name] - if isinstance(visual_name,str): - visual_name = [visual_name] - - if bookmark_name is not None and (page_name is not None or visual_name is not None): - print(f"{red_dot} If the 'bookmark_name' parameter is set, the 'page_name' and 'visual_name' parameters must not be set.") - return - if visual_name is not None and page_name is None: - print(f"{red_dot} If the 'visual_name' parameter is set, the 'page_name' parameter must be set.") - return - - validFormats = { - 'ACCESSIBLEPDF': '.pdf', - 'CSV': '.csv', - 'DOCX': '.docx', - 'MHTML': '.mhtml', - 'PDF': '.pdf', - 'PNG': '.png', - 'PPTX': '.pptx', - 'XLSX': '.xlsx', - 'XML': '.xml', - 'BMP': '.bmp', - 'EMF': '.emf', - 'GIF': '.gif', - 'JPEG': '.jpeg', - 'TIFF': '.tiff' - } - - export_format = export_format.upper() - if export_format not in validFormats: - print(f"{red_dot} The '{export_format}' format is not a valid format for exporting Power BI reports. Please enter a valid format. Options: {validFormats}") - return - - fileExt = validFormats.get(export_format) - - if file_name == None: - file_name = report + fileExt - else: - file_name = file_name + fileExt - - folderPath = '/lakehouse/default/Files' - filePath = os.path.join(folderPath, file_name) - - dfI = fabric.list_items(workspace = workspace) - dfI_filt = dfI[(dfI['Type'].isin(['Report', 'PaginatedReport'])) & (dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist in the '{workspace}' workspace.") - return - - reportType = dfI_filt['Type'].iloc[0] - - # Limitations - pbiOnly = ['PNG'] - paginatedOnly = ['ACCESSIBLEPDF','CSV','DOCX', 'BMP', 'EMF', 'GIF', 'JPEG', 'TIFF', 'MHTML', 'XLSX', 'XML'] - - if reportType == 'Report' and export_format in paginatedOnly: - print(f"{red_dot} The '{export_format}' format is only supported for paginated reports.") - return - if reportType == 'PaginatedReport' and export_format in pbiOnly: - print(f"{red_dot} The '{export_format}' format is only supported for Power BI reports.") - return - - if reportType == 'PaginatedReport' and (bookmark_name is not None or page_name is not None or visual_name is not None): - print(f"{red_dot} Export for paginated reports does not support bookmarks/pages/visuals. Those parameters must not be set for paginated reports.") - return - - reportId = dfI_filt['Id'].iloc[0] - client = fabric.PowerBIRestClient() - - dfVisual = list_report_visuals(report = report, workspace = workspace) - dfPage = list_report_pages(report = report, workspace = workspace) - - if export_format in ['BMP', 'EMF', 'GIF', 'JPEG', 'TIFF'] and reportType == 'PaginatedReport': - request_body = { - 'format': 'IMAGE', - 'paginatedReportConfiguration': { - 'formatSettings': { - 'OutputFormat': export_format.lower() - } - } - } - elif bookmark_name is None and page_name is None and visual_name is None: - request_body = { - 'format': export_format - } - elif bookmark_name is not None: - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - 'defaultBookmark': { - 'name': bookmark_name - } - } - } - elif page_name is not None and visual_name is None: - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - } - } - - request_body['powerBIReportConfiguration']['pages'] = [] - - for page in page_name: - dfPage_filt = dfPage[dfPage['Page ID'] == page] - if len(dfPage_filt) == 0: - print(f"{red_dot} The '{page}' page does not exist in the '{report}' report within the '{workspace}' workspace.") - return - page_dict = {'pageName': page} - request_body['powerBIReportConfiguration']['pages'].append(page_dict) - - elif page_name is not None and visual_name is not None: - if len(page_name) != len(visual_name): - print(f"{red_dot} Each 'visual_name' must map to a single 'page_name'.") - return - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - } - } - - request_body['powerBIReportConfiguration']['pages'] = [] - a=0 - for page in page_name: - visual = visual_name[a] - dfVisual_filt = dfVisual[(dfVisual['Page ID'] == page) & (dfVisual['Visual ID'] == visual)] - if len(dfVisual_filt) == 0: - print(f"{red_dot} The '{visual}' visual does not exist on the '{page}' in the '{report}' report within the '{workspace}' workspace.") - return - page_dict = {'pageName': page,'visualName': visual} - request_body['powerBIReportConfiguration']['pages'].append(page_dict) - a+=1 - - # Transform and add report filter if it is specified - if report_filter is not None and reportType == 'Report': - reportFilter = generate_embedded_filter(filter = report_filter) - report_level_filter = {'filter': reportFilter} - - if 'powerBIReportConfiguration' not in request_body: - request_body['powerBIReportConfiguration'] = {} - request_body['powerBIReportConfiguration']['reportLevelFilters'] = [report_level_filter] - print(request_body) - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/ExportTo",json=request_body) - if response.status_code == 202: - response_body = json.loads(response.content) - exportId = response_body['id'] - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}") - response_body = json.loads(response.content) - while response_body['status'] not in ['Succeeded', 'Failed']: - time.sleep(3) - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}") - response_body = json.loads(response.content) - if response_body['status'] == 'Failed': - print(f"{red_dot} The export for the '{report}' report within the '{workspace}' workspace in the '{export_format}' format has failed.") - else: - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}/file") - print(f"{in_progress} Saving the '{export_format}' export for the '{report}' report within the '{workspace}' workspace to the lakehouse...") - with open(filePath, "wb") as export_file: - export_file.write(response.content) - print(f"{green_dot} The '{export_format}' export for the '{report}' report within the '{workspace}' workspace has been saved to the following location: '{filePath}'.") - - -def clone_report(report: str, cloned_report: str, workspace: Optional[str] = None, target_workspace: Optional[str] = None, target_dataset: Optional[str] = None): - - """ - Clones a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - cloned_report : str - Name of the new Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - target_workspace : str, default=None - The name of the Fabric workspace to place the cloned report. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - target_dataset : str, default=None - The name of the semantic model to be used by the cloned report. - Defaults to None which resolves to the semantic model used by the initial report. - - Returns - ------- - - """ - - #https://learn.microsoft.com/rest/api/power-bi/reports/clone-report-in-group - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - dfI = fabric.list_items(workspace = workspace, type = 'Report') - dfI_filt = dfI[(dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist within the '{workspace}' workspace.") - return - - reportId = resolve_report_id(report, workspace) - - if target_workspace is None: - target_workspace = workspace - target_workspace_id = workspace_id - else: - dfW = fabric.list_workspaces() - dfW_filt = dfW[dfW['Name'] == target_workspace] - - if len(dfW_filt) == 0: - print(f"{red_dot} The '{workspace}' is not a valid workspace.") - return - target_workspace_id = dfW_filt['Id'].iloc[0] - - if target_dataset == None: - dfR = fabric.list_reports(workspace = target_workspace) - dfR_filt = dfR[dfR['Name'] == report] - target_dataset_id = dfR_filt['Dataset Id'].iloc[0] - target_dataset = resolve_dataset_name(dataset_id = target_dataset_id, workspace = target_workspace) - else: - dfD = fabric.list_datasets(workspace = target_workspace) - dfD_filt = dfD[dfD['Dataset Name'] == target_dataset] - - if len(dfD_filt) == 0: - print(f"{red_dot} The '{target_dataset}' target dataset does not exist in the '{target_workspace}' workspace.") - return - target_dataset_id = dfD_filt['Dataset Id'].iloc[0] - - client = fabric.PowerBIRestClient() - - if target_workspace is None and target_dataset is None: - request_body = { - "name": cloned_report - } - elif target_workspace is not None and target_dataset is None: - request_body = { - "name": cloned_report, - "targetWorkspaceId": target_workspace_id - } - elif target_workspace is not None and target_dataset is not None: - request_body = { - "name": cloned_report, - "targetModelId": target_dataset_id, - "targetWorkspaceId": target_workspace_id - } - elif target_workspace is None and target_dataset is not None: - request_body = { - "name": cloned_report, - "targetModelId": target_dataset_id - } - - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/Clone",json=request_body) - - if response.status_code == 200: - print(f"{green_dot} The '{report}' report has been successfully cloned as the '{cloned_report}' report within the '{target_workspace}' workspace using the '{target_dataset}' semantic model.") - else: - print(f"{red_dot} POST request failed with status code: {response.status_code}") - -def launch_report(report: str, workspace: Optional[str] = None): - - """ - Shows a Power BI report within a Fabric notebook. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - str - An embedded Power BI report within the notebook. - """ - - from .HelperFunctions import resolve_report_id - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - reportId = resolve_report_id(report, workspace) - - report = Report(group_id=workspace_id, report_id=reportId) - - return report - -def list_report_pages(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all pages within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the pages within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - df = pd.DataFrame(columns=['Page ID', 'Page Name', 'Hidden', 'Width', 'Height', 'Visual Count']) - - reportJson = get_report_json(report = report, workspace = workspace) - - for section in reportJson['sections']: - pageID = section['name'] - pageName = section['displayName'] - #pageFilters = section['filters'] - pageWidth = section['width'] - pageHeight = section['height'] - visualCount = len(section['visualContainers']) - pageHidden = False - pageConfig = section['config'] - pageConfigJson = json.loads(pageConfig) - - try: - pageH = pageConfigJson['visibility'] - if pageH == 1: - pageHidden = True - except: - pass - - new_data = {'Page ID': pageID, 'Page Name': pageName, 'Hidden': pageHidden, 'Width': pageWidth, 'Height': pageHeight, 'Visual Count': visualCount} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Hidden'] = df['Hidden'].astype(bool) - intCol = ['Width', 'Height', 'Visual Count'] - df[intCol] = df[intCol].astype(int) - - return df - -def list_report_visuals(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all visuals within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the visuals within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - reportJson = get_report_json(report = report, workspace = workspace) - - df = pd.DataFrame(columns=['Page Name', 'Page ID', 'Visual ID', 'Title']) - - for section in reportJson['sections']: - pageID = section['name'] - pageName = section['displayName'] - - for visual in section['visualContainers']: - visualConfig = visual['config'] - visualConfigJson = json.loads(visualConfig) - visualID = visualConfigJson['name'] - - try: - title = visualConfigJson["singleVisual"]["vcObjects"]["title"][0]["properties"]["text"]["expr"]["Literal"]["Value"] - title = title[1:-1] - except: - title = '' - - new_data = {'Page Name': pageName, 'Page ID': pageID, 'Visual ID': visualID, 'Title': title} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - -def list_report_bookmarks(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all bookmarks within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the bookmarks within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - df = pd.DataFrame(columns=['Bookmark ID', 'Bookmark Name', 'Page ID', 'Visual ID', 'Visual Hidden']) - - reportJson = get_report_json(report = report, workspace = workspace) - reportConfig = reportJson['config'] - reportConfigJson = json.loads(reportConfig) - - try: - for bookmark in reportConfigJson['bookmarks']: - bID = bookmark['name'] - bName = bookmark['displayName'] - rptPageId = bookmark['explorationState']['activeSection'] - - for rptPg in bookmark['explorationState']['sections']: - for vc in bookmark['explorationState']['sections'][rptPg]['visualContainers']: - vHidden = False - try: - hidden = bookmark['explorationState']['sections'][rptPg]['visualContainers'][vc]['singleVisual']['display']['mode'] - if hidden == 'hidden': - vHidden = True - except: - pass - - new_data = {'Bookmark ID': bID, 'Bookmark Name': bName, 'Page ID': rptPageId, 'Visual ID': vc, 'Visual Hidden': vHidden } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - listPages = list_report_pages(report = report, workspace = workspace) - - df = pd.merge(df, listPages[['Page ID', 'Page Name']], on='Page ID', how='left') - df = df[['Bookmark ID', 'Bookmark Name', 'Page ID', 'Page Name', 'Visual ID', 'Visual Hidden']] - - return df - - except: - print(f"The '{report}' report within the '{workspace}' workspace has no bookmarks.") - -def translate_report_titles(report: str, languages: Union[str,List[str]], workspace: Optional[str] = None): - - """ - Dynamically generates new Power BI reports which have report titles translated into the specified language(s). - - Parameters - ---------- - report : str - Name of the Power BI report. - languages : str, List[str] - The language code(s) in which to translate the report titles. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if isinstance(languages, str): - languages = [languages] - - for lang in languages: - language_validate(lang) - - reportJson = get_report_json(report = report, workspace = workspace) - dfV = list_report_visuals(report = report, workspace = workspace) - spark = SparkSession.builder.getOrCreate() - df = spark.createDataFrame(dfV) - columnToTranslate = 'Title' - - translate = ( - Translate() - .setTextCol(columnToTranslate) - .setToLanguage(languages) - .setOutputCol("translation") - .setConcurrency(5) - ) - - transDF = (translate - .transform(df) - .withColumn("translation", flatten(col("translation.translations"))) - .withColumn("translation", col("translation.text")) - .select('Visual ID', columnToTranslate, 'translation')) - - df_panda = transDF.toPandas() - - i=0 - for lang in languages: - #Clone report - language = language_validate(lang) - clonedReportName = f"{report}_{language}" - - dfRep = fabric.list_reports(workspace = workspace) - dfRep_filt = dfRep[(dfRep['Name'] == clonedReportName) & (dfRep['Report Type'] == 'PowerBIReport')] - - if len(dfRep_filt) > 0: - print(f"{yellow_dot} The '{clonedReportName}' report already exists in the '{workspace} workspace.") - else: - clone_report(report = report, cloned_report = clonedReportName, workspace = workspace) - print(f"{green_dot} The '{clonedReportName}' report has been created via clone in the '{workspace} workspace.") - - rptJsonTr = copy.deepcopy(reportJson) - - # Update report json file - for section in rptJsonTr['sections']: - for visual in section['visualContainers']: - visualConfig = visual['config'] - visualConfigJson = json.loads(visualConfig) - visualID = visualConfigJson['name'] - - df_filt = df_panda[(df_panda['Visual ID'] == visualID) & (df_panda['Title'] != '')] - - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - if len(tr) > 0: - prop = visualConfigJson["singleVisual"]["vcObjects"]["title"][0]["properties"]["text"]["expr"]["Literal"] - prop['Value'] = f"'{tr}'" - - visual['config'] = json.dumps(visualConfigJson) - - i+=1 - - # Post updated report json file to cloned report - update_report_from_reportjson(report = clonedReportName, report_json = rptJsonTr, workspace = workspace) - print(f"{green_dot} The visual titles within the '{clonedReportName}' report within the '{workspace}' have been translated into '{language}' accordingly.") - - - - - - \ No newline at end of file diff --git a/sempy_labs/ShowUnsupportedDirectLakeObjects.py b/sempy_labs/ShowUnsupportedDirectLakeObjects.py deleted file mode 100644 index 0f4277a0..00000000 --- a/sempy_labs/ShowUnsupportedDirectLakeObjects.py +++ /dev/null @@ -1,68 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .ListFunctions import list_tables -from .HelperFunctions import format_dax_object_name -from typing import List, Optional, Union - -def show_unsupported_direct_lake_objects(dataset: str, workspace: Optional[str] = None): - - """ - Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame, pandas.DataFrame, pandas.DataFrame - 3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake. - """ - - pd.options.mode.chained_assignment = None - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - - # Calc tables - dfT_filt = dfT[dfT['Type'] == 'Calculated Table'] - dfT_filt.rename(columns={'Name': 'Table Name'}, inplace=True) - t = dfT_filt[['Table Name', 'Type']] - - # Calc columns - dfC_filt = dfC[(dfC['Type'] == 'Calculated') | (dfC['Data Type'] == 'Binary')] - c = dfC_filt[['Table Name', 'Column Name', 'Type', 'Data Type', 'Source']] - - # Relationships - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - merged_from = pd.merge(dfR, dfC, left_on='From Object', right_on='Column Object', how='left') - merged_to = pd.merge(dfR, dfC, left_on='To Object', right_on='Column Object', how='left') - - dfR['From Column Data Type'] = merged_from['Data Type'] - dfR['To Column Data Type'] = merged_to['Data Type'] - - dfR_filt = dfR[((dfR['From Column Data Type'] == 'DateTime') | (dfR['To Column Data Type'] == 'DateTime')) | (dfR['From Column Data Type'] != dfR['To Column Data Type'])] - r = dfR_filt[['From Table', 'From Column', 'To Table', 'To Column', 'From Column Data Type', 'To Column Data Type']] - - #print('Calculated Tables are not supported...') - #display(t) - #print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations") - #print('Calculated columns are not supported. Columns of binary data type are not supported.') - #display(c) - #print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.') - #display(r) - - return t, c, r \ No newline at end of file diff --git a/sempy_labs/TOM.py b/sempy_labs/TOM.py index d5a18fff..0237a81c 100644 --- a/sempy_labs/TOM.py +++ b/sempy_labs/TOM.py @@ -3,30 +3,30 @@ import pandas as pd import re from datetime import datetime -from .HelperFunctions import format_dax_object_name -from .ListFunctions import list_relationships +from ._helper_functions import format_dax_object_name +from ._list_functions import list_relationships from .RefreshSemanticModel import refresh_semantic_model -from .Fallback import check_fallback_reason +from ._fallback import check_fallback_reason from contextlib import contextmanager from typing import List, Optional, Union, TYPE_CHECKING from sempy._utils._log import log +import sempy_labs._icons as icons if TYPE_CHECKING: import Microsoft.AnalysisServices.Tabular -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' -checked = '\u2611' -unchecked = '\u2610' -start_bold = '\033[1m' -end_bold = '\033[0m' + +checked = "\u2611" +unchecked = "\u2610" +start_bold = "\033[1m" +end_bold = "\033[0m" + @log @contextmanager -def connect_semantic_model(dataset: str, readonly: Optional[bool] = True, workspace: Optional[str] = None): - +def connect_semantic_model( + dataset: str, readonly: Optional[bool] = True, workspace: Optional[str] = None +): """ Connects to the Tabular Object Model (TOM) within a semantic model. @@ -54,18 +54,19 @@ def connect_semantic_model(dataset: str, readonly: Optional[bool] = True, worksp if workspace is None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - + fpAdded = [] class TOMWrapper: def __init__(self, dataset, workspace, readonly): - - tom_server = fabric.create_tom_server(readonly=readonly, workspace=workspace) + + tom_server = fabric.create_tom_server( + readonly=readonly, workspace=workspace + ) self.model = tom_server.Databases.GetByName(dataset).Model def all_columns(self): - """ Outputs a list of all columns within all tables in the semantic model. @@ -84,7 +85,6 @@ def all_columns(self): yield c def all_calculated_columns(self): - """ Outputs a list of all calculated columns within all tables in the semantic model. @@ -103,7 +103,6 @@ def all_calculated_columns(self): yield c def all_calculated_tables(self): - """ Outputs a list of all calculated tables in the semantic model. @@ -121,7 +120,6 @@ def all_calculated_tables(self): yield t def all_calculation_groups(self): - """ Outputs a list of all calculation groups in the semantic model. @@ -139,7 +137,6 @@ def all_calculation_groups(self): yield t def all_measures(self): - """ Outputs a list of all measures in the semantic model. @@ -157,7 +154,6 @@ def all_measures(self): yield m def all_partitions(self): - """ Outputs a list of all partitions in the semantic model. @@ -175,7 +171,6 @@ def all_partitions(self): yield p def all_hierarchies(self): - """ Outputs a list of all hierarchies in the semantic model. @@ -193,7 +188,6 @@ def all_hierarchies(self): yield h def all_levels(self): - """ Outputs a list of all levels in the semantic model. @@ -212,7 +206,6 @@ def all_levels(self): yield l def all_calculation_items(self): - """ Outputs a list of all calculation items in the semantic model. @@ -231,7 +224,6 @@ def all_calculation_items(self): yield ci def all_rls(self): - """ Outputs a list of all row level security expressions in the semantic model. @@ -248,8 +240,16 @@ def all_rls(self): for tp in r.TablePermissions: yield tp - def add_measure(self, table_name: str, measure_name: str, expression: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None): - + def add_measure( + self, + table_name: str, + measure_name: str, + expression: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + ): """ Adds a measure to the semantic model. @@ -276,7 +276,7 @@ def add_measure(self, table_name: str, measure_name: str, expression: str, forma """ obj = TOM.Measure() - obj.Name= measure_name + obj.Name = measure_name obj.Expression = expression obj.IsHidden = hidden if format_string is not None: @@ -288,8 +288,20 @@ def add_measure(self, table_name: str, measure_name: str, expression: str, forma self.model.Tables[table_name].Measures.Add(obj) - def add_calculated_table_column(self, table_name: str, column_name: str, source_column: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - + def add_calculated_table_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): """ Adds a calculated table column to a calculated table within a semantic model. @@ -324,10 +336,18 @@ def add_calculated_table_column(self, table_name: str, column_name: str, source_ """ - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) obj = TOM.CalculatedTableColumn() obj.Name = column_name @@ -346,8 +366,20 @@ def add_calculated_table_column(self, table_name: str, column_name: str, source_ obj.DataCategory = data_category self.model.Tables[table_name].Columns.Add(obj) - def add_data_column(self, table_name: str, column_name: str, source_column: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - + def add_data_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): """ Adds a data column to a table within a semantic model. @@ -382,10 +414,18 @@ def add_data_column(self, table_name: str, column_name: str, source_column: str, """ - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) obj = TOM.DataColumn() obj.Name = column_name @@ -404,8 +444,20 @@ def add_data_column(self, table_name: str, column_name: str, source_column: str, obj.DataCategory = data_category self.model.Tables[table_name].Columns.Add(obj) - def add_calculated_column(self, table_name: str, column_name: str, expression: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - + def add_calculated_column( + self, + table_name: str, + column_name: str, + expression: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): """ Adds a calculated column to a table within a semantic model. @@ -440,10 +492,18 @@ def add_calculated_column(self, table_name: str, column_name: str, expression: s """ - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) obj = TOM.CalculatedColumn() obj.Name = column_name @@ -462,8 +522,15 @@ def add_calculated_column(self, table_name: str, column_name: str, expression: s obj.DataCategory = data_category self.model.Tables[table_name].Columns.Add(obj) - def add_calculation_item(self, table_name: str, calculation_item_name: str, expression: str, ordinal: Optional[int] = None, format_string_expression: Optional[str] = None, description: Optional[str] = None): - + def add_calculation_item( + self, + table_name: str, + calculation_item_name: str, + expression: str, + ordinal: Optional[int] = None, + format_string_expression: Optional[str] = None, + description: Optional[str] = None, + ): """ Adds a calculation item to a calculation group within a semantic model. @@ -499,8 +566,12 @@ def add_calculation_item(self, table_name: str, calculation_item_name: str, expr obj.FormatStringDefinition = fsd.Expression = format_string_expression self.model.Tables[table_name].CalculationGroup.CalculationItems.Add(obj) - def add_role(self, role_name: str, model_permission: Optional[str] = None, description: Optional[str] = None): - + def add_role( + self, + role_name: str, + model_permission: Optional[str] = None, + description: Optional[str] = None, + ): """ Adds a role to a semantic model. @@ -520,17 +591,18 @@ def add_role(self, role_name: str, model_permission: Optional[str] = None, descr """ if model_permission is None: - model_permission = 'Read' + model_permission = "Read" obj = TOM.ModelRole() obj.Name = role_name - obj.ModelPermission = System.Enum.Parse(TOM.ModelPermission, model_permission) + obj.ModelPermission = System.Enum.Parse( + TOM.ModelPermission, model_permission + ) if description is not None: obj.Description = description self.model.Roles.Add(obj) def set_rls(self, role_name: str, table_name: str, filter_expression: str): - """ Sets the row level security permissions for a table within a role. @@ -553,12 +625,15 @@ def set_rls(self, role_name: str, table_name: str, filter_expression: str): tp.FilterExpression = filter_expression try: - self.model.Roles[role_name].TablePermissions[table_name].FilterExpression = filter_expression + self.model.Roles[role_name].TablePermissions[ + table_name + ].FilterExpression = filter_expression except: self.model.Roles[role_name].TablePermissions.Add(tp) - def set_ols(self, role_name: str, table_name: str, column_name: str, permission: str): - + def set_ols( + self, role_name: str, table_name: str, column_name: str, permission: str + ): """ Sets the object level security permissions for a column within a role. @@ -580,20 +655,35 @@ def set_ols(self, role_name: str, table_name: str, column_name: str, permission: permission = permission.capitalize() - if permission not in ['Read', 'None', 'Default']: + if permission not in ["Read", "None", "Default"]: print(f"ERROR! Invalid 'permission' value.") return cp = TOM.ColumnPermission() cp.Column = self.model.Tables[table_name].Columns[column_name] - cp.MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + cp.MetadataPermission = System.Enum.Parse( + TOM.MetadataPermission, permission + ) try: - self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions[column_name].MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + self.model.Roles[role_name].TablePermissions[ + table_name + ].ColumnPermissions[column_name].MetadataPermission = System.Enum.Parse( + TOM.MetadataPermission, permission + ) except: - self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions.Add(cp) - - def add_hierarchy(self, table_name: str, hierarchy_name: str, columns: List[str], levels: Optional[List[str]] = None, hierarchy_description: Optional[str] = None, hierarchy_hidden: Optional[bool] = False): - + self.model.Roles[role_name].TablePermissions[ + table_name + ].ColumnPermissions.Add(cp) + + def add_hierarchy( + self, + table_name: str, + hierarchy_name: str, + columns: List[str], + levels: Optional[List[str]] = None, + hierarchy_description: Optional[str] = None, + hierarchy_hidden: Optional[bool] = False, + ): """ Adds a hierarchy to a table within a semantic model. @@ -618,19 +708,25 @@ def add_hierarchy(self, table_name: str, hierarchy_name: str, columns: List[str] """ if isinstance(columns, str): - print(f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']") + print( + f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']" + ) return if len(columns) == 1: - print(f"There must be at least 2 levels in order to create a hierarchy.") + print( + f"There must be at least 2 levels in order to create a hierarchy." + ) return - + if levels is None: levels = columns - + if len(columns) != len(levels): - print(f"If specifying level names, you must specify a level for each column.") + print( + f"If specifying level names, you must specify a level for each column." + ) return - + obj = TOM.Hierarchy() obj.Name = hierarchy_name obj.IsHidden = hierarchy_hidden @@ -643,10 +739,23 @@ def add_hierarchy(self, table_name: str, hierarchy_name: str, columns: List[str] lvl.Column = self.model.Tables[table_name].Columns[col] lvl.Name = levels[columns.index(col)] lvl.Ordinal = columns.index(col) - self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add(lvl) - - def add_relationship(self, from_table: str, from_column: str, to_table: str, to_column: str, from_cardinality: str, to_cardinality: str, cross_filtering_behavior: Optional[str] = None, is_active: Optional[bool] = True, security_filtering_behavior: Optional[str] = None, rely_on_referential_integrity: Optional[bool] = False): - + self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add( + lvl + ) + + def add_relationship( + self, + from_table: str, + from_column: str, + to_table: str, + to_column: str, + from_cardinality: str, + to_cardinality: str, + cross_filtering_behavior: Optional[str] = None, + is_active: Optional[bool] = True, + security_filtering_behavior: Optional[str] = None, + rely_on_referential_integrity: Optional[bool] = False, + ): """ Adds a relationship to a semantic model. @@ -670,7 +779,7 @@ def add_relationship(self, from_table: str, from_column: str, to_table: str, to_ is_active : bool, default=True Setting for whether the relationship is active or not. security_filtering_behavior : str, default=None - Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). + Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). Defaults to None which resolves to 'OneDirection'. rely_on_referential_integrity : bool, default=False Setting for the rely on referential integrity of the relationship. @@ -681,31 +790,48 @@ def add_relationship(self, from_table: str, from_column: str, to_table: str, to_ """ if cross_filtering_behavior is None: - cross_filtering_behavior = 'Automatic' + cross_filtering_behavior = "Automatic" if security_filtering_behavior is None: - security_filtering_behavior = 'OneDirection' + security_filtering_behavior = "OneDirection" from_cardinality = from_cardinality.capitalize() to_cardinality = to_cardinality.capitalize() cross_filtering_behavior = cross_filtering_behavior.capitalize() security_filtering_behavior = security_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.replace('direct', 'Direct') - cross_filtering_behavior = cross_filtering_behavior.replace('direct', 'Direct') + security_filtering_behavior = security_filtering_behavior.replace( + "direct", "Direct" + ) + cross_filtering_behavior = cross_filtering_behavior.replace( + "direct", "Direct" + ) rel = TOM.SingleColumnRelationship() rel.FromColumn = self.model.Tables[from_table].Columns[from_column] - rel.FromCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, from_cardinality) + rel.FromCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, from_cardinality + ) rel.ToColumn = self.model.Tables[to_table].Columns[to_column] - rel.ToCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, to_cardinality) + rel.ToCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, to_cardinality + ) rel.IsActive = is_active - rel.CrossFilteringBehavior = System.Enum.Parse(TOM.CrossFilteringBehavior, cross_filtering_behavior) - rel.SecurityFilteringBehavior = System.Enum.Parse(TOM.SecurityFilteringBehavior, security_filtering_behavior) + rel.CrossFilteringBehavior = System.Enum.Parse( + TOM.CrossFilteringBehavior, cross_filtering_behavior + ) + rel.SecurityFilteringBehavior = System.Enum.Parse( + TOM.SecurityFilteringBehavior, security_filtering_behavior + ) rel.RelyOnReferentialIntegrity = rely_on_referential_integrity self.model.Relationships.Add(rel) - def add_calculation_group(self, name: str, precedence: int, description: Optional[str] = None, hidden: Optional[bool] = False): - + def add_calculation_group( + self, + name: str, + precedence: int, + description: Optional[str] = None, + hidden: Optional[bool] = False, + ): """ Adds a calculation group to a semantic model. @@ -718,7 +844,7 @@ def add_calculation_group(self, name: str, precedence: int, description: Optiona description : str, default=None A description of the calculation group. hidden : bool, default=False - Whether the calculation group is hidden/visible. + Whether the calculation group is hidden/visible. Returns ------- @@ -738,28 +864,29 @@ def add_calculation_group(self, name: str, precedence: int, description: Optiona part.Source = TOM.CalculationGroupSource() tbl.Partitions.Add(part) - sortCol = 'Ordinal' + sortCol = "Ordinal" col1 = TOM.DataColumn() col1.Name = sortCol col1.SourceColumn = sortCol col1.IsHidden = True - col1.DataType = System.Enum.Parse(TOM.DataType, 'Int64') + col1.DataType = System.Enum.Parse(TOM.DataType, "Int64") tbl.Columns.Add(col1) col2 = TOM.DataColumn() - col2.Name = 'Name' - col2.SourceColumn = 'Name' - col2.DataType = System.Enum.Parse(TOM.DataType, 'String') - #col.SortByColumn = m.Tables[name].Columns[sortCol] + col2.Name = "Name" + col2.SourceColumn = "Name" + col2.DataType = System.Enum.Parse(TOM.DataType, "String") + # col.SortByColumn = m.Tables[name].Columns[sortCol] tbl.Columns.Add(col2) self.model.DiscourageImplicitMeasures = True self.model.Tables.Add(tbl) - def add_expression(self, name: str, expression: str, description: Optional[str] = None): - + def add_expression( + self, name: str, expression: str, description: Optional[str] = None + ): """ Adds an expression to a semantic model. @@ -770,7 +897,7 @@ def add_expression(self, name: str, expression: str, description: Optional[str] expression: str The M expression of the expression. description : str, default=None - A description of the expression. + A description of the expression. Returns ------- @@ -787,7 +914,6 @@ def add_expression(self, name: str, expression: str, description: Optional[str] self.model.Expressions.Add(exp) def add_translation(self, language: str): - """ Adds a translation language (culture) to a semantic model. @@ -810,7 +936,6 @@ def add_translation(self, language: str): pass def add_perspective(self, perspective_name: str): - """ Adds a perspective to a semantic model. @@ -828,8 +953,14 @@ def add_perspective(self, perspective_name: str): persp.Name = perspective_name self.model.Perspectives.Add(persp) - def add_m_partition(self, table_name: str, partition_name: str, expression: str, mode: Optional[str] = None, description: Optional[str] = None): - + def add_m_partition( + self, + table_name: str, + partition_name: str, + expression: str, + mode: Optional[str] = None, + description: Optional[str] = None, + ): """ Adds an M-partition to a table within a semantic model. @@ -846,13 +977,18 @@ def add_m_partition(self, table_name: str, partition_name: str, expression: str, Defaults to None which resolves to 'Import'. description : str, default=None A description for the partition. - + Returns ------- """ - mode = mode.title().replace('query', 'Query').replace(' ','').replace('lake', 'Lake') + mode = ( + mode.title() + .replace("query", "Query") + .replace(" ", "") + .replace("lake", "Lake") + ) mp = TOM.MPartitionSource() mp.Expression = expression @@ -862,13 +998,18 @@ def add_m_partition(self, table_name: str, partition_name: str, expression: str, if description is not None: p.Description = description if mode is None: - mode = 'Default' + mode = "Default" p.Mode = System.Enum.Parse(TOM.ModeType, mode) self.model.Tables[table_name].Partitions.Add(p) - def add_entity_partition(self, table_name: str, entity_name: str, expression: Optional[str] = None, description: Optional[str] = None): - + def add_entity_partition( + self, + table_name: str, + entity_name: str, + expression: Optional[str] = None, + description: Optional[str] = None, + ): """ Adds an entity partition to a table within a semantic model. @@ -883,7 +1024,7 @@ def add_entity_partition(self, table_name: str, entity_name: str, expression: Op Defaults to None which resolves to the 'DatabaseQuery' expression. description : str, default=None A description for the partition. - + Returns ------- @@ -893,7 +1034,7 @@ def add_entity_partition(self, table_name: str, entity_name: str, expression: Op ep.Name = table_name ep.EntityName = entity_name if expression is None: - ep.ExpressionSource = self.model.Expressions['DatabaseQuery'] + ep.ExpressionSource = self.model.Expressions["DatabaseQuery"] else: ep.ExpressionSource = expression p = TOM.Partition() @@ -902,11 +1043,17 @@ def add_entity_partition(self, table_name: str, entity_name: str, expression: Op p.Mode = TOM.ModeType.DirectLake if description is not None: p.Description = description - - self.model.Tables[table_name].Partitions.Add(p) - def set_alternate_of(self, table_name: str, column_name: str, summarization_type: str, base_table: str, base_column: Optional[str] = None): + self.model.Tables[table_name].Partitions.Add(p) + def set_alternate_of( + self, + table_name: str, + column_name: str, + summarization_type: str, + base_table: str, + base_column: Optional[str] = None, + ): """ Sets the 'alternate of' property on a column. @@ -922,24 +1069,34 @@ def set_alternate_of(self, table_name: str, column_name: str, summarization_type Name of the base table for aggregation. base_column : str Name of the base column for aggregation - + Returns ------- """ - - if base_column is not None and base_table is None: - print(f"ERROR: If you specify the base table you must also specify the base column") - summarization_type = summarization_type.replace(' ','').capitalize().replace('Groupby', 'GroupBy') + if base_column is not None and base_table is None: + print( + f"ERROR: If you specify the base table you must also specify the base column" + ) + + summarization_type = ( + summarization_type.replace(" ", "") + .capitalize() + .replace("Groupby", "GroupBy") + ) - summarizationTypes = ['Sum', 'GroupBy', 'Count', 'Min', 'Max'] + summarizationTypes = ["Sum", "GroupBy", "Count", "Min", "Max"] if summarization_type not in summarizationTypes: - print(f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}.") + print( + f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}." + ) return ao = TOM.AlternateOf() - ao.Summarization = System.Enum.Parse(TOM.SummarizationType, summarization_type) + ao.Summarization = System.Enum.Parse( + TOM.SummarizationType, summarization_type + ) if base_column is not None: ao.BaseColumn = self.model.Tables[base_table].Columns[base_column] else: @@ -954,7 +1111,6 @@ def set_alternate_of(self, table_name: str, column_name: str, summarization_type c.IsHidden = True def remove_alternate_of(self, table_name: str, column_name: str): - """ Removes the 'alternate of' property on a column. @@ -964,7 +1120,7 @@ def remove_alternate_of(self, table_name: str, column_name: str): Name of the table. column_name : str Name of the column. - + Returns ------- @@ -972,8 +1128,9 @@ def remove_alternate_of(self, table_name: str, column_name: str): self.model.Tables[table_name].Columns[column_name].AlternateOf = None - def get_annotations(self, object) -> 'Microsoft.AnalysisServices.Tabular.Annotation': - + def get_annotations( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.Annotation": """ Shows all annotations for a given object within a semantic model. @@ -981,22 +1138,21 @@ def get_annotations(self, object) -> 'Microsoft.AnalysisServices.Tabular.Annotat ---------- object : TOM Object An object (i.e. table/column/measure) within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.Annotation TOM objects of all the annotations on a particular object within the semantic model. """ - #df = pd.DataFrame(columns=['Name', 'Value']) + # df = pd.DataFrame(columns=['Name', 'Value']) for a in object.Annotations: - #new_data = {'Name': a.Name, 'Value': a.Value} + # new_data = {'Name': a.Name, 'Value': a.Value} yield a - #df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - def set_annotation(self, object, name: str, value: str): + # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + def set_annotation(self, object, name: str, value: str): """ Sets an annotation on an object within the semantic model. @@ -1008,7 +1164,7 @@ def set_annotation(self, object, name: str, value: str): Name of the annotation. value : str Value of the annotation. - + Returns ------- @@ -1024,7 +1180,6 @@ def set_annotation(self, object, name: str, value: str): object.Annotations.Add(ann) def get_annotation_value(self, object, name: str): - """ Obtains the annotation value for a given annotation on an object within the semantic model. @@ -1034,7 +1189,7 @@ def get_annotation_value(self, object, name: str): An object (i.e. table/column/measure) within a semantic model. name : str Name of the annotation. - + Returns ------- str @@ -1044,7 +1199,6 @@ def get_annotation_value(self, object, name: str): return object.Annotations[name].Value def remove_annotation(self, object, name: str): - """ Removes an annotation on an object within the semantic model. @@ -1054,7 +1208,7 @@ def remove_annotation(self, object, name: str): An object (i.e. table/column/measure) within a semantic model. name : str Name of the annotation. - + Returns ------- @@ -1063,7 +1217,6 @@ def remove_annotation(self, object, name: str): object.Annotations.Remove(name) def clear_annotations(self, object): - """ Removes all annotations on an object within the semantic model. @@ -1071,7 +1224,7 @@ def clear_annotations(self, object): ---------- object : TOM Object An object (i.e. table/column/measure) within a semantic model. - + Returns ------- @@ -1079,8 +1232,9 @@ def clear_annotations(self, object): object.Annotations.Clear() - def get_extended_properties(self, object) -> 'Microsoft.AnalysisServices.Tabular.ExtendedProperty': - + def get_extended_properties( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.ExtendedProperty": """ Retrieves all extended properties on an object within the semantic model. @@ -1088,24 +1242,25 @@ def get_extended_properties(self, object) -> 'Microsoft.AnalysisServices.Tabular ---------- object : TOM Object An object (i.e. table/column/measure) within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.ExtendedPropertiesCollection TOM Objects of all the extended properties. """ - #df = pd.DataFrame(columns=['Name', 'Value', 'Type']) + # df = pd.DataFrame(columns=['Name', 'Value', 'Type']) for a in object.ExtendedProperties: yield a - #new_data = {'Name': a.Name, 'Value': a.Value, 'Type': a.Type} - #df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + # new_data = {'Name': a.Name, 'Value': a.Value, 'Type': a.Type} + # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - #return df - - def set_extended_property(self, object, extended_property_type: str, name: str, value: str): + # return df + def set_extended_property( + self, object, extended_property_type: str, name: str, value: str + ): """ Sets an extended property on an object within the semantic model. @@ -1119,7 +1274,7 @@ def set_extended_property(self, object, extended_property_type: str, name: str, Name of the extended property. value : str Value of the extended property. - + Returns ------- @@ -1127,7 +1282,7 @@ def set_extended_property(self, object, extended_property_type: str, name: str, extended_property_type = extended_property_type.title() - if extended_property_type == 'Json': + if extended_property_type == "Json": ep = TOM.JsonExtendedProperty() else: ep = TOM.StringExtendedProperty() @@ -1141,7 +1296,6 @@ def set_extended_property(self, object, extended_property_type: str, name: str, object.ExtendedProperties.Add(ep) def get_extended_property_value(self, object, name: str): - """ Retrieves the value of an extended property for an object within the semantic model. @@ -1151,7 +1305,7 @@ def get_extended_property_value(self, object, name: str): An object (i.e. table/column/measure) within a semantic model. name : str Name of the annotation. - + Returns ------- str @@ -1161,7 +1315,6 @@ def get_extended_property_value(self, object, name: str): return object.ExtendedProperties[name].Value def remove_extended_property(self, object, name: str): - """ Removes an extended property on an object within the semantic model. @@ -1171,7 +1324,7 @@ def remove_extended_property(self, object, name: str): An object (i.e. table/column/measure) within a semantic model. name : str Name of the annotation. - + Returns ------- @@ -1180,7 +1333,6 @@ def remove_extended_property(self, object, name: str): object.ExtendedProperties.Remove(name) def clear_extended_properties(self, object): - """ Removes all extended properties on an object within the semantic model. @@ -1188,16 +1340,19 @@ def clear_extended_properties(self, object): ---------- object : TOM Object An object (i.e. table/column/measure) within a semantic model. - + Returns ------- """ object.ExtendedProperties.Clear() - - def in_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - + + def in_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): """ Indicates whether an object is contained within a given perspective. @@ -1207,37 +1362,55 @@ def in_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', An object (i.e. table/column/measure) within a semantic model. perspecitve_name : str Name of the perspective. - + Returns ------- bool An indication as to whether the object is contained within the given perspective. """ - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] objectType = object.ObjectType if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) return - + object.Model.Perspectives[perspective_name] - try: + try: if objectType == TOM.ObjectType.Table: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Name] + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] elif objectType == TOM.ObjectType.Column: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns[object.Name] + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns[object.Name] elif objectType == TOM.ObjectType.Measure: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures[object.Name] + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures[object.Name] elif objectType == TOM.ObjectType.Hierarchy: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies[object.Name] + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies[object.Name] return True except: return False - def add_to_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - + def add_to_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): """ Adds an object to a perspective. @@ -1247,17 +1420,24 @@ def add_to_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measu An object (i.e. table/column/measure) within a semantic model. perspective_name : str Name of the perspective. - + Returns ------- """ - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] objectType = object.ObjectType if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) return try: object.Model.Perspectives[perspective_name] @@ -1265,7 +1445,7 @@ def add_to_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measu print(f"The '{perspective_name}' perspective does not exist.") return - #try: + # try: if objectType == TOM.ObjectType.Table: pt = TOM.PerspectiveTable() pt.Table = object @@ -1273,20 +1453,29 @@ def add_to_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measu elif objectType == TOM.ObjectType.Column: pc = TOM.PerspectiveColumn() pc.Column = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns.Add(pc) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Add(pc) elif objectType == TOM.ObjectType.Measure: pm = TOM.PerspectiveMeasure() pm.Measure = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures.Add(pm) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Add(pm) elif objectType == TOM.ObjectType.Hierarchy: ph = TOM.PerspectiveHierarchy() ph.Hierarchy = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies.Add(ph) - #except: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Add(ph) + # except: # pass - def remove_from_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - + def remove_from_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): """ Removes an object from a perspective. @@ -1296,17 +1485,24 @@ def remove_from_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM. An object (i.e. table/column/measure) within a semantic model. perspective_name : str Name of the perspective. - + Returns ------- """ - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] objectType = object.ObjectType if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) return try: object.Model.Perspectives[perspective_name] @@ -1314,24 +1510,49 @@ def remove_from_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM. print(f"The '{perspective_name}' perspective does not exist.") return - #try: + # try: if objectType == TOM.ObjectType.Table: - pt = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Name] + pt = object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] object.Model.Perspectives[perspective_name].PerspectiveTables.Remove(pt) elif objectType == TOM.ObjectType.Column: - pc = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns.Remove(pc) + pc = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveColumns[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Remove(pc) elif objectType == TOM.ObjectType.Measure: - pm = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures.Remove(pm) + pm = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveMeasures[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Remove(pm) elif objectType == TOM.ObjectType.Hierarchy: - ph = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies.Remove(ph) - #except: + ph = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveHierarchies[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Remove(ph) + # except: # pass - def set_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], language: str, property: str, value: str): - + def set_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + property: str, + value: str, + ): """ Sets a translation value for an object's property. @@ -1345,26 +1566,31 @@ def set_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure' The property to set. Options: 'Name', 'Description', 'Display Folder'. value : str The transation value. - + Returns ------- """ - self.add_translation(language = language) + self.add_translation(language=language) property = property.title() - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] #, 'Level' + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] # , 'Level' if object.ObjectType not in validObjects: print(f"Translations can only be set to {validObjects}.") return mapping = { - 'Name': TOM.TranslatedProperty.Caption, - 'Description': TOM.TranslatedProperty.Description, - 'Display Folder': TOM.TranslatedProperty.DisplayFolder + "Name": TOM.TranslatedProperty.Caption, + "Description": TOM.TranslatedProperty.Description, + "Display Folder": TOM.TranslatedProperty.DisplayFolder, } prop = mapping.get(property) @@ -1372,14 +1598,20 @@ def set_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure' try: object.Model.Cultures[language] except: - print(f"The '{language}' translation language does not exist in the semantic model.") + print( + f"The '{language}' translation language does not exist in the semantic model." + ) return - object.Model.Cultures[language].ObjectTranslations.SetTranslation(object, prop, value) - - - def remove_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], language: str): + object.Model.Cultures[language].ObjectTranslations.SetTranslation( + object, prop, value + ) + def remove_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + ): """ Removes an object's translation value. @@ -1389,17 +1621,18 @@ def remove_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measu An object (i.e. table/column/measure) within a semantic model. language : str The language code. - + Returns ------- """ - o = object.Model.Cultures[language].ObjectTranslations[object, TOM.TranslatedProperty.Caption] + o = object.Model.Cultures[language].ObjectTranslations[ + object, TOM.TranslatedProperty.Caption + ] object.Model.Cultures[language].ObjectTranslations.Remove(o) def remove_object(self, object): - """ Removes an object from a semantic model. @@ -1407,7 +1640,7 @@ def remove_object(self, object): ---------- object : TOM Object An object (i.e. table/column/measure) within a semantic model. - + Returns ------- @@ -1416,16 +1649,18 @@ def remove_object(self, object): objType = object.ObjectType # Have to remove translations and perspectives on the object before removing it. - if objType in ['Table', 'Column', 'Measure', 'Hierarchy', 'Level']: + if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]: for lang in object.Model.Cultures: try: - self.remove_translation(object = object, language = lang.Name) + self.remove_translation(object=object, language=lang.Name) except: pass - if objType in ['Table', 'Column', 'Measure', 'Hierarchy']: + if objType in ["Table", "Column", "Measure", "Hierarchy"]: for persp in object.Model.Perspectives: try: - self.remove_from_perspective(object = object, perspective_name = persp.Name) + self.remove_from_perspective( + object=object, perspective_name=persp.Name + ) except: pass @@ -1456,8 +1691,7 @@ def remove_object(self, object): elif objType == TOM.ObjectType.TablePermission: object.Parent.TablePermissions.Remove(object.Name) - def used_in_relationships(self, object: Union['TOM.Table', 'TOM.Column']): - + def used_in_relationships(self, object: Union["TOM.Table", "TOM.Column"]): """ Shows all relationships in which a table/column is used. @@ -1465,7 +1699,7 @@ def used_in_relationships(self, object: Union['TOM.Table', 'TOM.Column']): ---------- object : TOM Object An object (i.e. table/column) within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.RelationshipCollection @@ -1477,15 +1711,19 @@ def used_in_relationships(self, object: Union['TOM.Table', 'TOM.Column']): if objType == TOM.ObjectType.Table: for r in self.model.Relationships: if r.FromTable.Name == object.Name or r.ToTable.Name == object.Name: - yield r#, 'Table' + yield r # , 'Table' elif objType == TOM.ObjectType.Column: for r in self.model.Relationships: - if (r.FromTable.Name == object.Parent.Name and r.FromColumn.Name == object.Name) or \ - (r.ToTable.Name == object.Parent.Name and r.ToColumn.Name == object.Name): - yield r#, 'Column' - - def used_in_levels(self, column: 'TOM.Column'): + if ( + r.FromTable.Name == object.Parent.Name + and r.FromColumn.Name == object.Name + ) or ( + r.ToTable.Name == object.Parent.Name + and r.ToColumn.Name == object.Name + ): + yield r # , 'Column' + def used_in_levels(self, column: "TOM.Column"): """ Shows all levels in which a column is used. @@ -1493,7 +1731,7 @@ def used_in_levels(self, column: 'TOM.Column'): ---------- object : TOM Object An column object within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.LevelCollection @@ -1504,11 +1742,13 @@ def used_in_levels(self, column: 'TOM.Column'): if objType == TOM.ObjectType.Column: for l in self.all_levels(): - if l.Parent.Table.Name == column.Parent.Name and l.Column.Name == column.Name: + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): yield l - - def used_in_hierarchies(self, column: 'TOM.Column'): + def used_in_hierarchies(self, column: "TOM.Column"): """ Shows all hierarchies in which a column is used. @@ -1516,7 +1756,7 @@ def used_in_hierarchies(self, column: 'TOM.Column'): ---------- object : TOM Object An column object within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.HierarchyCollection @@ -1527,11 +1767,13 @@ def used_in_hierarchies(self, column: 'TOM.Column'): if objType == TOM.ObjectType.Column: for l in self.all_levels(): - if l.Parent.Table.Name == column.Parent.Name and l.Column.Name == column.Name: + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): yield l.Parent - def used_in_sort_by(self, column: 'TOM.Column'): - + def used_in_sort_by(self, column: "TOM.Column"): """ Shows all columns in which a column is used for sorting. @@ -1539,7 +1781,7 @@ def used_in_sort_by(self, column: 'TOM.Column'): ---------- object : TOM Object An column object within a semantic model. - + Returns ------- Microsoft.AnalysisServices.Tabular.ColumnCollection @@ -1553,8 +1795,11 @@ def used_in_sort_by(self, column: 'TOM.Column'): if c.SortByColumn == column: yield c - def used_in_rls(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): - + def used_in_rls( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): """ Identifies the filter expressions which reference a given object. @@ -1564,38 +1809,52 @@ def used_in_rls(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], d An object (i.e. table/column) within a semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - + """ objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Rows Allowed'] + + df_filt = dependencies[dependencies["Object Type"] == "Rows Allowed"] if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() for t in self.model.Tables: if t.Name in tbls: yield t elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() for c in self.all_columns(): if format_dax_object_name(c.Parent.Name, c.Name) in cols: yield c elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() for m in self.all_measures(): if m.Name in meas: yield m - def used_in_data_coverage_definition(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): - + def used_in_data_coverage_definition( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): """ Identifies the ... which reference a given object. @@ -1605,38 +1864,54 @@ def used_in_data_coverage_definition(self, object: Union['TOM.Table', 'TOM.Colum An object (i.e. table/column) within a semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - + """ objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Data Coverage Definition'] + + df_filt = dependencies[ + dependencies["Object Type"] == "Data Coverage Definition" + ] if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() for t in self.model.Tables: if t.Name in tbls: yield t elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() for c in self.all_columns(): if format_dax_object_name(c.Parent.Name, c.Name) in cols: yield c elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() for m in self.all_measures(): if m.Name in meas: yield m - - def used_in_calc_item(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): + def used_in_calc_item( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): """ Identifies the ... which reference a given object. @@ -1646,44 +1921,54 @@ def used_in_calc_item(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measur An object (i.e. table/column) within a semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - + """ objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Calculation Item'] + + df_filt = dependencies[dependencies["Object Type"] == "Calculation Item"] if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() for t in self.model.Tables: if t.Name in tbls: yield t elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() for c in self.all_columns(): if format_dax_object_name(c.Parent.Name, c.Name) in cols: yield c elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() for m in self.all_measures(): if m.Name in meas: yield m def hybrid_tables(self): - """ Outputs the hybrid tables within a semantic model. Parameters ---------- - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection @@ -1696,13 +1981,12 @@ def hybrid_tables(self): yield t def date_tables(self): - """ Outputs the tables which are marked as date tables within a semantic model. Parameters ---------- - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection @@ -1710,12 +1994,14 @@ def date_tables(self): """ for t in self.model.Tables: - if t.DataCategory == 'Time': - if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): + if t.DataCategory == "Time": + if any( + c.IsKey and c.DataType == TOM.DataType.DateTime + for c in t.Columns + ): yield t def is_hybrid_table(self, table_name: str): - """ Identifies if a table is a hybrid table. @@ -1723,7 +2009,7 @@ def is_hybrid_table(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -1732,14 +2018,19 @@ def is_hybrid_table(self, table_name: str): isHybridTable = False - if any(p.Mode == TOM.ModeType.Import for p in self.model.Tables[table_name].Partitions): - if any(p.Mode == TOM.ModeType.DirectQuery for p in self.model.Tables[table_name].Partitions): + if any( + p.Mode == TOM.ModeType.Import + for p in self.model.Tables[table_name].Partitions + ): + if any( + p.Mode == TOM.ModeType.DirectQuery + for p in self.model.Tables[table_name].Partitions + ): isHybridTable = True return isHybridTable def is_date_table(self, table_name: str): - """ Identifies if a table is marked as a date table. @@ -1747,7 +2038,7 @@ def is_date_table(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -1757,14 +2048,15 @@ def is_date_table(self, table_name: str): isDateTable = False t = self.model.Tables[table_name] - if t.DataCategory == 'Time': - if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): + if t.DataCategory == "Time": + if any( + c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns + ): isDateTable = True return isDateTable - - def mark_as_date_table(self, table_name: str, column_name: str): + def mark_as_date_table(self, table_name: str, column_name: str): """ Marks a table as a date table. @@ -1774,7 +2066,7 @@ def mark_as_date_table(self, table_name: str, column_name: str): Name of the table. column_name : str Name of the date column in the table. - + Returns ------- @@ -1783,9 +2075,11 @@ def mark_as_date_table(self, table_name: str, column_name: str): t = self.model.Tables[table_name] c = t.Columns[column_name] if c.DataType != TOM.DataType.DateTime: - print(f"{red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type.") + print( + f"{icons.red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type." + ) return - + daxQuery = f""" define measure '{table_name}'[test] = var mn = MIN('{table_name}'[{column_name}]) @@ -1799,25 +2093,30 @@ def mark_as_date_table(self, table_name: str, column_name: str): "1",[test] ) """ - df = fabric.evaluate_dax(dataset=dataset, workspace=workspace, dax_string = daxQuery) - value = df['1'].iloc[0] - if value != '1': - print(f"{red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values.") + df = fabric.evaluate_dax( + dataset=dataset, workspace=workspace, dax_string=daxQuery + ) + value = df["1"].iloc[0] + if value != "1": + print( + f"{icons.red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values." + ) return - + # Mark as a date table - t.DataCategory = 'Time' - c.Columns[column_name].IsKey = True - print(f"{green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key.") - - def has_aggs(self): + t.DataCategory = "Time" + c.Columns[column_name].IsKey = True + print( + f"{icons.green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key." + ) + def has_aggs(self): """ Identifies if a semantic model has any aggregations. Parameters ---------- - + Returns ------- bool @@ -1831,9 +2130,8 @@ def has_aggs(self): hasAggs = True return hasAggs - - def is_agg_table(self, table_name: str): + def is_agg_table(self, table_name: str): """ Identifies if a table has aggregations. @@ -1841,7 +2139,7 @@ def is_agg_table(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -1853,13 +2151,12 @@ def is_agg_table(self, table_name: str): return any(c.AlternateOf is not None for c in t.Columns) def has_hybrid_table(self): - """ Identifies if a semantic model has a hybrid table. Parameters ---------- - + Returns ------- bool @@ -1869,19 +2166,18 @@ def has_hybrid_table(self): hasHybridTable = False for t in self.model.Tables: - if self.is_hybrid_table(table_name = t.Name): + if self.is_hybrid_table(table_name=t.Name): hasHybridTable = True return hasHybridTable def has_date_table(self): - """ Identifies if a semantic model has a table marked as a date table. Parameters ---------- - + Returns ------- bool @@ -1891,29 +2187,31 @@ def has_date_table(self): hasDateTable = False for t in self.model.Tables: - if self.is_date_table(table_name = t.Name): + if self.is_date_table(table_name=t.Name): hasDateTable = True return hasDateTable def is_direct_lake(self): - """ Identifies if a semantic model is in Direct Lake mode. Parameters ---------- - + Returns ------- bool Indicates if the semantic model is in Direct Lake mode. """ - return any(p.Mode == TOM.ModeType.DirectLake for t in self.model.Tables for p in t.Partitions) + return any( + p.Mode == TOM.ModeType.DirectLake + for t in self.model.Tables + for p in t.Partitions + ) def is_field_parameter(self, table_name: str): - """ Identifies if a table is a field parameter. @@ -1921,7 +2219,7 @@ def is_field_parameter(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -1930,10 +2228,21 @@ def is_field_parameter(self, table_name: str): t = self.model.Tables[table_name] - return any(p.SourceType == TOM.PartitionSourceType.Calculated and 'NAMEOF(' in p.Source.Expression for p in t.Partitions) and all('[Value' in c.SourceColumn for c in t.Columns if c.Type != TOM.ColumnType.RowNumber) and t.Columns.Count == 4 - - def is_auto_date_table(self, table_name: str): + return ( + any( + p.SourceType == TOM.PartitionSourceType.Calculated + and "NAMEOF(" in p.Source.Expression + for p in t.Partitions + ) + and all( + "[Value" in c.SourceColumn + for c in t.Columns + if c.Type != TOM.ColumnType.RowNumber + ) + and t.Columns.Count == 4 + ) + def is_auto_date_table(self, table_name: str): """ Identifies if a table is an auto-date table. @@ -1941,7 +2250,7 @@ def is_auto_date_table(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -1952,14 +2261,28 @@ def is_auto_date_table(self, table_name: str): t = self.model.Tables[table_name] - if t.Name.startswith('LocalDateTable_') or t.Name.startswith('DateTableTemplate_'): - if any(p.SourceType == TOM.PartitionSourceType.Calculated for p in t.Partitions): + if t.Name.startswith("LocalDateTable_") or t.Name.startswith( + "DateTableTemplate_" + ): + if any( + p.SourceType == TOM.PartitionSourceType.Calculated + for p in t.Partitions + ): isAutoDate = True return isAutoDate - def set_kpi(self, measure_name: str, target: Union[int,float,str], lower_bound: float, upper_bound: float, lower_mid_bound: Optional[float] = None, upper_mid_bound: Optional[float] = None, status_type: Optional[str] = None, status_graphic: Optional[str] = None): - + def set_kpi( + self, + measure_name: str, + target: Union[int, float, str], + lower_bound: float, + upper_bound: float, + lower_mid_bound: Optional[float] = None, + upper_mid_bound: Optional[float] = None, + status_type: Optional[str] = None, + status_graphic: Optional[str] = None, + ): """ Sets the properties to add/update a KPI for a measure. @@ -1983,60 +2306,94 @@ def set_kpi(self, measure_name: str, target: Union[int,float,str], lower_bound: status_graphic : str, default=None The status graphic for the KPI. Defaults to 'Three Circles Colored'. - + Returns ------- """ - #https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md + # https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md if measure_name == target: - print(f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter.") + print( + f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter." + ) return if status_graphic is None: - status_graphic = 'Three Circles Colored' + status_graphic = "Three Circles Colored" - statusType = ['Linear', 'LinearReversed', 'Centered', 'CenteredReversed'] - status_type = status_type.title().replace(' ','') + statusType = ["Linear", "LinearReversed", "Centered", "CenteredReversed"] + status_type = status_type.title().replace(" ", "") if status_type is None: - status_type = 'Linear' + status_type = "Linear" if status_type not in statusType: - print(f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}.") + print( + f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}." + ) return - if status_type in ['Linear', 'LinearReversed']: + if status_type in ["Linear", "LinearReversed"]: if upper_bound is not None or lower_mid_bound is not None: - print(f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None.") + print( + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None." + ) return elif upper_bound <= lower_bound: print(f"The upper_bound must be greater than the lower_bound.") return - - if status_type in ['Centered', 'CenteredReversed']: + + if status_type in ["Centered", "CenteredReversed"]: if upper_mid_bound is None or lower_mid_bound is None: - print(f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types.") + print( + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types." + ) return elif upper_bound <= upper_mid_bound: print(f"The upper_bound must be greater than the upper_mid_bound.") elif upper_mid_bound <= lower_mid_bound: - print(f"The upper_mid_bound must be greater than the lower_mid_bound.") + print( + f"The upper_mid_bound must be greater than the lower_mid_bound." + ) elif lower_mid_bound <= lower_bound: print(f"The lower_mid_bound must be greater than the lower_bound.") try: - table_name = next(m.Parent.Name for m in self.all_measures() if m.Name == measure_name) + table_name = next( + m.Parent.Name for m in self.all_measures() if m.Name == measure_name + ) except: - print(f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'.") + print( + f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'." + ) return - - graphics = ['Cylinder', 'Five Bars Colored', 'Five Boxes Colored', 'Gauge - Ascending', 'Gauge - Descending', 'Road Signs', 'Shapes', 'Standard Arrow', 'Three Circles Colored', 'Three Flags Colored', 'Three Stars Colored', 'Three Symbols Uncircled Colored', 'Traffic Light', 'Traffic Light - Single', 'Variance Arrow', 'Status Arrow - Ascending', 'Status Arrow - Descending'] + + graphics = [ + "Cylinder", + "Five Bars Colored", + "Five Boxes Colored", + "Gauge - Ascending", + "Gauge - Descending", + "Road Signs", + "Shapes", + "Standard Arrow", + "Three Circles Colored", + "Three Flags Colored", + "Three Stars Colored", + "Three Symbols Uncircled Colored", + "Traffic Light", + "Traffic Light - Single", + "Variance Arrow", + "Status Arrow - Ascending", + "Status Arrow - Descending", + ] if status_graphic not in graphics: - print(f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}.") + print( + f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}." + ) return measure_target = True @@ -2047,22 +2404,28 @@ def set_kpi(self, measure_name: str, target: Union[int,float,str], lower_bound: measure_target = False except: try: - tgt = next(format_dax_object_name(m.Parent.Name, m.Name) for m in self.all_measures() if m.Name == target) + tgt = next( + format_dax_object_name(m.Parent.Name, m.Name) + for m in self.all_measures() + if m.Name == target + ) except: - print(f"The '{target}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'.") + print( + f"The '{target}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'." + ) if measure_target: expr = f"var x = [{measure_name}]/[{target}]\nreturn" else: expr = f"var x = [{measure_name}\nreturn" - if status_type == 'Linear': + if status_type == "Linear": expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_bound},-1,\n\t\tif(x<{upper_bound},0,1)))" - elif status_type == 'LinearReversed': + elif status_type == "LinearReversed": expr = f"{expr}\nif(isblank(x),blank(),\nif(x<{lower_bound},1,\n\t\tif(x<{upper_bound},0,-1)))" - elif status_type == 'Centered': + elif status_type == "Centered": expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},-1,0),\n\t\t\tif(x<{upper_mid_bound},1,\n\t\t\t\tif(x<{upper_bound}0,-1))))" - elif status_type == 'CenteredReversed': + elif status_type == "CenteredReversed": expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},1,0),\n\t\t\tif(x<{upper_mid_bound},-1,\n\t\t\t\tif(x<{upper_bound}0,1))))" kpi = TOM.KPI() @@ -2079,7 +2442,6 @@ def set_kpi(self, measure_name: str, target: Union[int,float,str], lower_bound: ms.KPI = kpi def set_aggregations(self, table_name: str, agg_table_name: str): - """ Sets the aggregations (alternate of) for all the columns in an aggregation table based on a base table. @@ -2089,7 +2451,7 @@ def set_aggregations(self, table_name: str, agg_table_name: str): Name of the base table. agg_table_name : str Name of the aggregation table. - + Returns ------- @@ -2099,15 +2461,26 @@ def set_aggregations(self, table_name: str, agg_table_name: str): dataType = c.DataType - if dataType in [TOM.DataType.String, TOM.DataType.Boolean, TOM.DataType.DateTime]: - sumType = 'GroupBy' + if dataType in [ + TOM.DataType.String, + TOM.DataType.Boolean, + TOM.DataType.DateTime, + ]: + sumType = "GroupBy" else: - sumType = 'Sum' + sumType = "Sum" - self.set_alternate_of(table_name = agg_table_name, column_name = c.Name, base_table = table_name, base_column = c.Name, summarization_type = sumType) - - def set_is_available_in_mdx(self, table_name: str, column_name: str, value: Optional[bool] = False): + self.set_alternate_of( + table_name=agg_table_name, + column_name=c.Name, + base_table=table_name, + base_column=c.Name, + summarization_type=sumType, + ) + def set_is_available_in_mdx( + self, table_name: str, column_name: str, value: Optional[bool] = False + ): """ Sets the IsAvailableInMdx property on a column. @@ -2119,7 +2492,7 @@ def set_is_available_in_mdx(self, table_name: str, column_name: str, value: Opti Name of the column. value : bool, default=False The IsAvailableInMdx property value. - + Returns ------- @@ -2127,8 +2500,9 @@ def set_is_available_in_mdx(self, table_name: str, column_name: str, value: Opti self.model.Tables[table_name].Columns[column_name].IsAvailableInMdx = value - def set_summarize_by(self, table_name: str, column_name: str, value: Optional[str] = None): - + def set_summarize_by( + self, table_name: str, column_name: str, value: Optional[str] = None + ): """ Sets the SummarizeBy property on a column. @@ -2141,27 +2515,43 @@ def set_summarize_by(self, table_name: str, column_name: str, value: Optional[st value : bool, default=None The SummarizeBy property value. Defaults to none which resolves to 'Default'. - + Returns ------- """ - values = ['Default', 'None', 'Sum', 'Min', 'Max', 'Count', 'Average', 'DistinctCount'] - #https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby + values = [ + "Default", + "None", + "Sum", + "Min", + "Max", + "Count", + "Average", + "DistinctCount", + ] + # https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby if value is None: - value = 'Default' - value = value.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') + value = "Default" + value = ( + value.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) if value not in values: - print(f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}.") + print( + f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}." + ) return - self.model.Tables[table_name].Columns[column_name].SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, value) + self.model.Tables[table_name].Columns[column_name].SummarizeBy = ( + System.Enum.Parse(TOM.AggregateFunction, value) + ) def set_direct_lake_behavior(self, direct_lake_behavior: str): - """ Sets the Direct Lake Behavior property for a semantic model. @@ -2169,30 +2559,45 @@ def set_direct_lake_behavior(self, direct_lake_behavior: str): ---------- direct_lake_behavior : str The DirectLakeBehavior property value. - + Returns ------- """ direct_lake_behavior = direct_lake_behavior.capitalize() - if direct_lake_behavior.startswith('Auto'): - direct_lake_behavior = 'Automatic' - elif direct_lake_behavior.startswith('Directl') or direct_lake_behavior == 'Dl': - direct_lake_behavior = 'DirectLakeOnly' - elif direct_lake_behavior.startswith('Directq') or direct_lake_behavior == 'Dq': - direct_lake_behavior = 'DirectQueryOnly' - - dlValues = ['Automatic', 'DirectLakeOnly', 'DirectQueryOnly'] + if direct_lake_behavior.startswith("Auto"): + direct_lake_behavior = "Automatic" + elif ( + direct_lake_behavior.startswith("Directl") + or direct_lake_behavior == "Dl" + ): + direct_lake_behavior = "DirectLakeOnly" + elif ( + direct_lake_behavior.startswith("Directq") + or direct_lake_behavior == "Dq" + ): + direct_lake_behavior = "DirectQueryOnly" + + dlValues = ["Automatic", "DirectLakeOnly", "DirectQueryOnly"] if direct_lake_behavior not in dlValues: - print(f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}.") + print( + f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}." + ) return - self.model.DirectLakeBehavior = System.Enum.Parse(TOM.DirectLakeBehavior, direct_lake_behavior) - - def add_table(self, name: str, description: Optional[str] = None, data_category: Optional[str] = None, hidden: Optional[bool] = False): + self.model.DirectLakeBehavior = System.Enum.Parse( + TOM.DirectLakeBehavior, direct_lake_behavior + ) + def add_table( + self, + name: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): """ Adds a table to the semantic model. @@ -2206,7 +2611,7 @@ def add_table(self, name: str, description: Optional[str] = None, data_category: The data category for the table. hidden : bool, default=False Whether the table is hidden or visible. - + Returns ------- @@ -2221,8 +2626,14 @@ def add_table(self, name: str, description: Optional[str] = None, data_category: t.Hidden = hidden self.model.Tables.Add(t) - def add_calculated_table(self, name: str, expression: str, description: Optional[str] = None, data_category: Optional[str] = None, hidden: Optional[bool] = False): - + def add_calculated_table( + self, + name: str, + expression: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): """ Adds a calculated table to the semantic model. @@ -2238,7 +2649,7 @@ def add_calculated_table(self, name: str, expression: str, description: Optional The data category for the table. hidden : bool, default=False Whether the table is hidden or visible. - + Returns ------- @@ -2262,7 +2673,6 @@ def add_calculated_table(self, name: str, expression: str, description: Optional self.model.Tables.Add(t) def add_field_parameter(self, table_name: str, objects: List[str]): - """ Adds a table to the semantic model. @@ -2271,10 +2681,10 @@ def add_field_parameter(self, table_name: str, objects: List[str]): table_name : str Name of the table. objects : List[str] - The columns/measures to be included in the field parameter. + The columns/measures to be included in the field parameter. Columns must be specified as such : 'Table Name'[Column Name]. Measures may be formatted as '[Measure Name]' or 'Measure Name'. - + Returns ------- @@ -2284,44 +2694,88 @@ def add_field_parameter(self, table_name: str, objects: List[str]): print(f"The 'objects' parameter must be a list of columns/measures.") return if len(objects) == 1: - print(f"There must be more than one object (column/measure) within the objects parameter.") + print( + f"There must be more than one object (column/measure) within the objects parameter." + ) return - - expr = '' - i=0 + + expr = "" + i = 0 for obj in objects: success = False for m in self.all_measures(): - if obj == '[' + m.Name + ']' or obj == m.Name: - expr = expr + '\n\t' + '("' + m.Name + '", NAMEOF([' + m.Name + ']), ' + str(i) + '),' + if obj == "[" + m.Name + "]" or obj == m.Name: + expr = ( + expr + + "\n\t" + + '("' + + m.Name + + '", NAMEOF([' + + m.Name + + "]), " + + str(i) + + ")," + ) success = True for c in self.all_columns(): fullObjName = format_dax_object_name(c.Parent.Name, c.Name) - if obj == fullObjName or obj == c.Parent.Name + '[' + c.Name + ']': - expr = expr + '\n\t' + '("' + c.Name + '", NAMEOF(' + fullObjName + '), ' + str(i) + '),' + if obj == fullObjName or obj == c.Parent.Name + "[" + c.Name + "]": + expr = ( + expr + + "\n\t" + + '("' + + c.Name + + '", NAMEOF(' + + fullObjName + + "), " + + str(i) + + ")," + ) success = True if not success: - print(f"The '{obj}' object was not found in the '{dataset}' semantic model.") + print( + f"The '{obj}' object was not found in the '{dataset}' semantic model." + ) return else: - i+=1 + i += 1 - expr = '{' + expr.rstrip(',') + '\n}' + expr = "{" + expr.rstrip(",") + "\n}" - self.add_calculated_table(name = table_name, expression = expr) + self.add_calculated_table(name=table_name, expression=expr) - col2 = table_name + ' Fields' - col3 = table_name + ' Order' + col2 = table_name + " Fields" + col3 = table_name + " Order" - self.add_calculated_table_column(table_name = table_name, column_name = table_name, source_column = '[Value1]', data_type = 'String', hidden = False ) - self.add_calculated_table_column(table_name = table_name, column_name = col2, source_column = '[Value2]', data_type = 'String', hidden = True ) - self.add_calculated_table_column(table_name = table_name, column_name = col3, source_column = '[Value3]', data_type = 'Int64', hidden = True ) + self.add_calculated_table_column( + table_name=table_name, + column_name=table_name, + source_column="[Value1]", + data_type="String", + hidden=False, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col2, + source_column="[Value2]", + data_type="String", + hidden=True, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col3, + source_column="[Value3]", + data_type="Int64", + hidden=True, + ) - self.set_extended_property(self = self, - object = self.model.Tables[table_name].Columns[col2], - extended_property_type = 'Json', - name = 'ParameterMetadata', - value = '{"version":3,"kind":2}') + self.set_extended_property( + self=self, + object=self.model.Tables[table_name].Columns[col2], + extended_property_type="Json", + name="ParameterMetadata", + value='{"version":3,"kind":2}', + ) rcd = TOM.RelatedColumnDetails() gpc = TOM.GroupByColumn() @@ -2329,19 +2783,20 @@ def add_field_parameter(self, table_name: str, objects: List[str]): rcd.GroupByColumns.Add(gpc) # Update column properties - self.model.Tables[table_name].Columns[col2].SortByColumn = self.model.Tables[table_name].Columns[col3] + self.model.Tables[table_name].Columns[col2].SortByColumn = ( + self.model.Tables[table_name].Columns[col3] + ) self.model.Tables[table_name].Columns[table_name].RelatedColumnDetails = rcd fpAdded.append(table_name) def remove_vertipaq_annotations(self): - """ Removes the annotations set using the [set_vertipaq_annotations] function. Parameters ---------- - + Returns ------- @@ -2349,92 +2804,138 @@ def remove_vertipaq_annotations(self): for t in self.model.Tables: for a in t.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = t, name = a.Name) + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=t, name=a.Name) for c in t.Columns: for a in c.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = c, name = a.Name) + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=c, name=a.Name) for h in t.Hierarchies: for a in h.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = h, name = a.Name) + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=h, name=a.Name) for p in t.Partitions: for a in p.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = p, name = a.Name) + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=p, name=a.Name) for r in self.model.Relationships: for a in r.Annotations: - if a.Name.startswith('Veripaq_'): - self.remove_annotation(object = r, name = a.Name) + if a.Name.startswith("Veripaq_"): + self.remove_annotation(object=r, name=a.Name) def set_vertipaq_annotations(self): - """ Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. Parameters ---------- - + Returns ------- """ - dfT = fabric.list_tables(dataset = dataset, workspace = workspace, extended=True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended=True) - #intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - dfCSum = dfC.groupby(['Table Name'])['Total Size'].sum().reset_index() - dfTable = pd.merge(dfT[['Name', 'Type', 'Row Count']], dfCSum[['Table Name', 'Total Size']], left_on = 'Name', right_on = 'Table Name', how = 'inner') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, extended=True) - dfP['Records per Segment'] = round(dfP['Record Count'] / dfP['Segment Count'],2) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace, extended=True) - dfR = list_relationships(dataset = dataset, workspace = workspace, extended=True) + dfT = fabric.list_tables( + dataset=dataset, workspace=workspace, extended=True + ) + dfC = fabric.list_columns( + dataset=dataset, workspace=workspace, extended=True + ) + # intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] + dfCSum = dfC.groupby(["Table Name"])["Total Size"].sum().reset_index() + dfTable = pd.merge( + dfT[["Name", "Type", "Row Count"]], + dfCSum[["Table Name", "Total Size"]], + left_on="Name", + right_on="Table Name", + how="inner", + ) + dfP = fabric.list_partitions( + dataset=dataset, workspace=workspace, extended=True + ) + dfP["Records per Segment"] = round( + dfP["Record Count"] / dfP["Segment Count"], 2 + ) + dfH = fabric.list_hierarchies( + dataset=dataset, workspace=workspace, extended=True + ) + dfR = list_relationships( + dataset=dataset, workspace=workspace, extended=True + ) for t in self.model.Tables: - dfT_filt = dfTable[dfTable['Name'] == t.Name] - rowCount = str(dfT_filt['Row Count'].iloc[0]) - totalSize = str(dfT_filt['Total Size'].iloc[0]) - self.set_annotation(object = t, name = 'Vertipaq_RowCount', value = rowCount) - self.set_annotation(object = t, name = 'Vertipaq_TableSize', value = totalSize) + dfT_filt = dfTable[dfTable["Name"] == t.Name] + rowCount = str(dfT_filt["Row Count"].iloc[0]) + totalSize = str(dfT_filt["Total Size"].iloc[0]) + self.set_annotation(object=t, name="Vertipaq_RowCount", value=rowCount) + self.set_annotation( + object=t, name="Vertipaq_TableSize", value=totalSize + ) for c in t.Columns: - dfC_filt = dfC[(dfC['Table Name'] == t.Name) & (dfC['Column Name'] == c.Name)] - totalSize = str(dfC_filt['Total Size'].iloc[0]) - dataSize = str(dfC_filt['Data Size'].iloc[0]) - dictSize = str(dfC_filt['Dictionary Size'].iloc[0]) - hierSize = str(dfC_filt['Hierarchy Size'].iloc[0]) - card = str(dfC_filt['Column Cardinality'].iloc[0]) - self.set_annotation(object = c, name = 'Vertipaq_TotalSize', value = totalSize) - self.set_annotation(object = c, name = 'Vertipaq_DataSize', value = dataSize) - self.set_annotation(object = c, name = 'Vertipaq_DictionarySize', value = dictSize) - self.set_annotation(object = c, name = 'Vertipaq_HierarchySize', value = hierSize) - self.set_annotation(object = c, name = 'Vertipaq_Cardinality', value = card) + dfC_filt = dfC[ + (dfC["Table Name"] == t.Name) & (dfC["Column Name"] == c.Name) + ] + totalSize = str(dfC_filt["Total Size"].iloc[0]) + dataSize = str(dfC_filt["Data Size"].iloc[0]) + dictSize = str(dfC_filt["Dictionary Size"].iloc[0]) + hierSize = str(dfC_filt["Hierarchy Size"].iloc[0]) + card = str(dfC_filt["Column Cardinality"].iloc[0]) + self.set_annotation( + object=c, name="Vertipaq_TotalSize", value=totalSize + ) + self.set_annotation( + object=c, name="Vertipaq_DataSize", value=dataSize + ) + self.set_annotation( + object=c, name="Vertipaq_DictionarySize", value=dictSize + ) + self.set_annotation( + object=c, name="Vertipaq_HierarchySize", value=hierSize + ) + self.set_annotation( + object=c, name="Vertipaq_Cardinality", value=card + ) for p in t.Partitions: - dfP_filt = dfP[(dfP['Table Name'] == t.Name) & (dfP['Partition Name'] == p.Name)] - recordCount = str(dfP_filt['Record Count'].iloc[0]) - segmentCount = str(dfP_filt['Segment Count'].iloc[0]) - rpS = str(dfP_filt['Records per Segment'].iloc[0]) - self.set_annotation(object = p, name = 'Vertipaq_RecordCount', value = recordCount) - self.set_annotation(object = p, name = 'Vertipaq_SegmentCount', value = segmentCount) - self.set_annotation(object = p, name = 'Vertipaq_RecordsPerSegment', value = rpS) + dfP_filt = dfP[ + (dfP["Table Name"] == t.Name) + & (dfP["Partition Name"] == p.Name) + ] + recordCount = str(dfP_filt["Record Count"].iloc[0]) + segmentCount = str(dfP_filt["Segment Count"].iloc[0]) + rpS = str(dfP_filt["Records per Segment"].iloc[0]) + self.set_annotation( + object=p, name="Vertipaq_RecordCount", value=recordCount + ) + self.set_annotation( + object=p, name="Vertipaq_SegmentCount", value=segmentCount + ) + self.set_annotation( + object=p, name="Vertipaq_RecordsPerSegment", value=rpS + ) for h in t.Hierarchies: - dfH_filt = dfH[(dfH['Table Name'] == t.Name) & (dfH['Hierarchy Name'] == h.Name)] - usedSize = str(dfH_filt['Used Size'].iloc[0]) - self.set_annotation(object = h, name = 'Vertipaq_UsedSize', value = usedSize) + dfH_filt = dfH[ + (dfH["Table Name"] == t.Name) + & (dfH["Hierarchy Name"] == h.Name) + ] + usedSize = str(dfH_filt["Used Size"].iloc[0]) + self.set_annotation( + object=h, name="Vertipaq_UsedSize", value=usedSize + ) for r in self.model.Relationships: - dfR_filt = dfR[dfR['Relationship Name'] == r.Name] - relSize = str(dfR_filt['Used Size'].iloc[0]) - self.set_annotation(object = r, name = 'Vertipaq_UsedSize', value = relSize) + dfR_filt = dfR[dfR["Relationship Name"] == r.Name] + relSize = str(dfR_filt["Used Size"].iloc[0]) + self.set_annotation(object=r, name="Vertipaq_UsedSize", value=relSize) try: - runId = self.get_annotation_value(object = self.model, name = 'Vertipaq_Run') + runId = self.get_annotation_value( + object=self.model, name="Vertipaq_Run" + ) runId = str(int(runId) + 1) except: - runId = '1' - self.set_annotation(object = self.model, name = 'Vertipaq_Run', value = runId) - - def row_count(self, object: Union['TOM.Partition', 'TOM.Table']): + runId = "1" + self.set_annotation(object=self.model, name="Vertipaq_Run", value=runId) + def row_count(self, object: Union["TOM.Partition", "TOM.Table"]): """ Obtains the row count of a table or partition within a semantic model. @@ -2442,24 +2943,27 @@ def row_count(self, object: Union['TOM.Partition', 'TOM.Table']): ---------- object : TOM Object The table/partition object within the semantic model. - + Returns ------- int Number of rows within the TOM object. """ - + objType = object.ObjectType - + if objType == TOM.ObjectType.Table: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RowCount') + result = self.get_annotation_value( + object=object, name="Vertipaq_RowCount" + ) elif objType == TOM.ObjectType.Partition: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RecordCount') + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordCount" + ) return int(result) - - def records_per_segment(self, object: 'TOM.Partition'): + def records_per_segment(self, object: "TOM.Partition"): """ Obtains the records per segment of a partition within a semantic model. @@ -2467,22 +2971,23 @@ def records_per_segment(self, object: 'TOM.Partition'): ---------- object : TOM Object The partition object within the semantic model. - + Returns ------- float Number of records per segment within the partition. """ - + objType = object.ObjectType - + if objType == TOM.ObjectType.Partition: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RecordsPerSegment') + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordsPerSegment" + ) return float(result) - - def used_size(self, object: Union['TOM.Hierarchy', 'TOM.Relationship']): + def used_size(self, object: Union["TOM.Hierarchy", "TOM.Relationship"]): """ Obtains the used size of a hierarchy or relationship within a semantic model. @@ -2490,24 +2995,27 @@ def used_size(self, object: Union['TOM.Hierarchy', 'TOM.Relationship']): ---------- object : TOM Object The hierarhcy/relationship object within the semantic model. - + Returns ------- int Used size of the TOM object. """ - + objType = object.ObjectType - + if objType == TOM.ObjectType.Hierarchy: - result = self.get_annotation_value(object = object, name = 'Vertipaq_UsedSize') + result = self.get_annotation_value( + object=object, name="Vertipaq_UsedSize" + ) elif objType == TOM.ObjectType.Relationship: - result = self.get_annotation_value(object = object, name = 'Vertipaq_UsedSize') + result = self.get_annotation_value( + object=object, name="Vertipaq_UsedSize" + ) return int(result) - def data_size(self, column: 'TOM.Column'): - + def data_size(self, column: "TOM.Column"): """ Obtains the data size of a column within a semantic model. @@ -2515,22 +3023,23 @@ def data_size(self, column: 'TOM.Column'): ---------- column : TOM Object The column object within the semantic model. - + Returns ------- int Data size of the TOM column. """ - + objType = column.ObjectType - + if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_DataSize') + result = self.get_annotation_value( + object=column, name="Vertipaq_DataSize" + ) return int(result) - def dictionary_size(self, column: 'TOM.Column'): - + def dictionary_size(self, column: "TOM.Column"): """ Obtains the dictionary size of a column within a semantic model. @@ -2538,7 +3047,7 @@ def dictionary_size(self, column: 'TOM.Column'): ---------- column : TOM Object The column object within the semantic model. - + Returns ------- int @@ -2548,12 +3057,13 @@ def dictionary_size(self, column: 'TOM.Column'): objType = column.ObjectType if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_DictionarySize') + result = self.get_annotation_value( + object=column, name="Vertipaq_DictionarySize" + ) return int(result) - - def total_size(self, object: Union['TOM.Table', 'TOM.Column']): + def total_size(self, object: Union["TOM.Table", "TOM.Column"]): """ Obtains the data size of a table/column within a semantic model. @@ -2561,7 +3071,7 @@ def total_size(self, object: Union['TOM.Table', 'TOM.Column']): ---------- object : TOM Object The table/column object within the semantic model. - + Returns ------- int @@ -2569,16 +3079,19 @@ def total_size(self, object: Union['TOM.Table', 'TOM.Column']): """ objType = object.ObjectType - + if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = object, name = 'Vertipaq_TotalSize') + result = self.get_annotation_value( + object=object, name="Vertipaq_TotalSize" + ) elif objType == TOM.ObjectType.Table: - result = self.get_annotation_value(object = object, name = 'Vertipaq_TotalSize') + result = self.get_annotation_value( + object=object, name="Vertipaq_TotalSize" + ) return int(result) - def cardinality(self, column: 'TOM.Column'): - + def cardinality(self, column: "TOM.Column"): """ Obtains the cardinality of a column within a semantic model. @@ -2586,22 +3099,23 @@ def cardinality(self, column: 'TOM.Column'): ---------- column : TOM Object The column object within the semantic model. - + Returns ------- int Cardinality of the TOM column. """ - + objType = column.ObjectType - + if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_Cardinality') + result = self.get_annotation_value( + object=column, name="Vertipaq_Cardinality" + ) - return int(result) - - def depends_on(self, object, dependencies: pd.DataFrame): + return int(result) + def depends_on(self, object, dependencies: pd.DataFrame): """ Obtains the objects on which the specified object depends. @@ -2611,7 +3125,7 @@ def depends_on(self, object, dependencies: pd.DataFrame): The TOM object within the semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection @@ -2625,10 +3139,28 @@ def depends_on(self, object, dependencies: pd.DataFrame): if objType == TOM.ObjectType.Table: objParentName = objName - fil = dependencies[(dependencies['Object Type'] == objType) & (dependencies['Table Name'] == objParentName) & (dependencies['Object Name'] == objName)] - meas = fil[fil['Referenced Object Type'] == 'Measure']['Referenced Object'].unique().tolist() - cols = fil[fil['Referenced Object Type'] == 'Column']['Referenced Full Object Name'].unique().tolist() - tbls = fil[fil['Referenced Object Type'] == 'Table']['Referenced Table'].unique().tolist() + fil = dependencies[ + (dependencies["Object Type"] == objType) + & (dependencies["Table Name"] == objParentName) + & (dependencies["Object Name"] == objName) + ] + meas = ( + fil[fil["Referenced Object Type"] == "Measure"]["Referenced Object"] + .unique() + .tolist() + ) + cols = ( + fil[fil["Referenced Object Type"] == "Column"][ + "Referenced Full Object Name" + ] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Referenced Object Type"] == "Table"]["Referenced Table"] + .unique() + .tolist() + ) for m in self.all_measures(): if m.Name in meas: yield m @@ -2640,7 +3172,6 @@ def depends_on(self, object, dependencies: pd.DataFrame): yield t def referenced_by(self, object, dependencies: pd.DataFrame): - """ Obtains the objects which reference the specified object. @@ -2650,7 +3181,7 @@ def referenced_by(self, object, dependencies: pd.DataFrame): The TOM object within the semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection @@ -2664,10 +3195,24 @@ def referenced_by(self, object, dependencies: pd.DataFrame): if objType == TOM.ObjectType.Table: objParentName = objName - fil = dependencies[(dependencies['Referenced Object Type'] == objType) & (dependencies['Referenced Table'] == objParentName) & (dependencies['Referenced Object'] == objName)] - meas = fil[fil['Object Type'] == 'Measure']['Object Name'].unique().tolist() - cols = fil[fil['Object Type'].isin(['Column', 'Calc Column'])]['Full Object Name'].unique().tolist() - tbls = fil[fil['Object Type'].isin(['Table', 'Calc Table'])]['Table Name'].unique().tolist() + fil = dependencies[ + (dependencies["Referenced Object Type"] == objType) + & (dependencies["Referenced Table"] == objParentName) + & (dependencies["Referenced Object"] == objName) + ] + meas = fil[fil["Object Type"] == "Measure"]["Object Name"].unique().tolist() + cols = ( + fil[fil["Object Type"].isin(["Column", "Calc Column"])][ + "Full Object Name" + ] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Object Type"].isin(["Table", "Calc Table"])]["Table Name"] + .unique() + .tolist() + ) for m in self.all_measures(): if m.Name in meas: yield m @@ -2678,8 +3223,9 @@ def referenced_by(self, object, dependencies: pd.DataFrame): if t.Name in tbls: yield t - def fully_qualified_measures(self, object: 'TOM.Measure', dependencies: pd.DataFrame): - + def fully_qualified_measures( + self, object: "TOM.Measure", dependencies: pd.DataFrame + ): """ Obtains all fully qualified measure references for a given object. @@ -2689,20 +3235,22 @@ def fully_qualified_measures(self, object: 'TOM.Measure', dependencies: pd.DataF The TOM object within the semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.MeasureCollection All fully qualified measure references for a given object. """ - - for obj in self.depends_on(object = object, dependencies=dependencies): + + for obj in self.depends_on(object=object, dependencies=dependencies): if obj.ObjectType == TOM.ObjectType.Measure: - if (obj.Parent.Name + obj.Name in object.Expression) or (format_dax_object_name(obj.Parent.Name, obj.Name) in object.Expression): + if (obj.Parent.Name + obj.Name in object.Expression) or ( + format_dax_object_name(obj.Parent.Name, obj.Name) + in object.Expression + ): yield obj - def unqualified_columns(self, object: 'TOM.Column', dependencies: pd.DataFrame): - + def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): """ Obtains all unqualified column references for a given object. @@ -2712,29 +3260,33 @@ def unqualified_columns(self, object: 'TOM.Column', dependencies: pd.DataFrame): The TOM object within the semantic model. dependencies : pandas.DataFrame A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - + Returns ------- Microsoft.AnalysisServices.Tabular.ColumnCollection All unqualified column references for a given object. """ - + def create_pattern(a, b): - return r'(? 0: usingView = True - + return usingView - - def has_incremental_refresh_policy(self, table_name: str): + def has_incremental_refresh_policy(self, table_name: str): """ Identifies whether a table has an incremental refresh policy. @@ -2761,7 +3312,7 @@ def has_incremental_refresh_policy(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- bool @@ -2775,9 +3326,8 @@ def has_incremental_refresh_policy(self, table_name: str): hasRP = True return hasRP - - def show_incremental_refresh_policy(self, table_name: str): + def show_incremental_refresh_policy(self, table_name: str): """ Prints the incremental refresh policy for a table. @@ -2785,7 +3335,7 @@ def show_incremental_refresh_policy(self, table_name: str): ---------- table_name : str Name of the table. - + Returns ------- @@ -2794,40 +3344,64 @@ def show_incremental_refresh_policy(self, table_name: str): rp = self.model.Tables[table_name].RefreshPolicy if rp is None: - print(f"The '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace does not have an incremental refresh policy.") - else: + print( + f"The '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace does not have an incremental refresh policy." + ) + else: print(f"Table Name: {table_name}") rwGran = str(rp.RollingWindowGranularity).lower() icGran = str(rp.IncrementalGranularity).lower() - if rp.RollingWindowPeriods > 1: - print(f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date.") + if rp.RollingWindowPeriods > 1: + print( + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date." + ) else: - print(f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date.") - if rp.IncrementalPeriods > 1: - print(f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date.") + print( + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date." + ) + if rp.IncrementalPeriods > 1: + print( + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date." + ) else: - print(f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date.") + print( + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date." + ) if rp.Mode == TOM.RefreshPolicyMode.Hybrid: - print(f"{checked} Get the latest data in real time with DirectQuery (Premium only)") + print( + f"{checked} Get the latest data in real time with DirectQuery (Premium only)" + ) else: - print(f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)") + print( + f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)" + ) if rp.IncrementalPeriodsOffset == -1: print(f"{checked} Only refresh complete days") else: print(f"{unchecked} Only refresh complete days") if len(rp.PollingExpression) > 0: - pattern = r'\[([^\]]+)\]' + pattern = r"\[([^\]]+)\]" match = re.search(pattern, rp.PollingExpression) if match: col = match[0][1:-1] fullCol = format_dax_object_name(table_name, col) - print(f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}") + print( + f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}" + ) else: print(f"{unchecked} Detect data changes") - def update_incremental_refresh_policy(self, table_name: str, incremental_granularity: str, incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, only_refresh_complete_days: Optional[bool] = False, detect_data_changes_column: Optional[str] = None): - + def update_incremental_refresh_policy( + self, + table_name: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): """ Updates the incremental refresh policy for a table within a semantic model. @@ -2854,27 +3428,37 @@ def update_incremental_refresh_policy(self, table_name: str, incremental_granula """ - if not self.has_incremental_refresh_policy(table_name = table_name): - print(f"The '{table_name}' table does not have an incremental refresh policy.") + if not self.has_incremental_refresh_policy(table_name=table_name): + print( + f"The '{table_name}' table does not have an incremental refresh policy." + ) return - - incGran = ['Day', 'Month', 'Quarter', 'Year'] + + incGran = ["Day", "Month", "Quarter", "Year"] incremental_granularity = incremental_granularity.capitalize() rolling_window_granularity = rolling_window_granularity.capitalize() if incremental_granularity not in incGran: - print(f"{red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}.") + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + ) return if rolling_window_granularity not in incGran: - print(f"{red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}.") + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + ) return - + if rolling_window_periods < 1: - print(f"{red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0.") + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + ) return if incremental_periods < 1: - print(f"{red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0.") + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + ) return t = self.model.Tables[table_name] @@ -2883,14 +3467,20 @@ def update_incremental_refresh_policy(self, table_name: str, incremental_granula dc = t.Columns[detect_data_changes_column] if dc.DataType != TOM.DataType.DateTime: - print(f"{red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type.") + print( + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + ) return rp = TOM.BasicRefreshPolicy() rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse(TOM.RefreshGranularityType, incremental_granularity) + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse(TOM.RefreshGranularityType, rolling_window_granularity) + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) rp.SourceExpression = t.RefreshPolicy.SourceExpression if only_refresh_complete_days: @@ -2909,8 +3499,19 @@ def update_incremental_refresh_policy(self, table_name: str, incremental_granula self.show_incremental_refresh_policy(table_name=table_name) - def add_incremental_refresh_policy(self, table_name: str, column_name: str, start_date: str, end_date: str, incremental_granularity: str, incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, only_refresh_complete_days: Optional[bool] = False, detect_data_changes_column: Optional[str] = None): - + def add_incremental_refresh_policy( + self, + table_name: str, + column_name: str, + start_date: str, + end_date: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): """ Adds anincremental refresh policy for a table within a semantic model. @@ -2943,28 +3544,36 @@ def add_incremental_refresh_policy(self, table_name: str, column_name: str, star """ - #https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure + # https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure - incGran = ['Day', 'Month', 'Quarter', 'Year'] + incGran = ["Day", "Month", "Quarter", "Year"] incremental_granularity = incremental_granularity.capitalize() rolling_window_granularity = rolling_window_granularity.capitalize() if incremental_granularity not in incGran: - print(f"{red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}.") + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + ) return if rolling_window_granularity not in incGran: - print(f"{red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}.") + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + ) return - + if rolling_window_periods < 1: - print(f"{red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0.") + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + ) return if incremental_periods < 1: - print(f"{red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0.") + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + ) return - - date_format = '%m/%d/%Y' + + date_format = "%m/%d/%Y" date_obj_start = datetime.strptime(start_date, date_format) start_year = date_obj_start.year @@ -2977,7 +3586,9 @@ def add_incremental_refresh_policy(self, table_name: str, column_name: str, star end_day = date_obj_end.day if date_obj_end <= date_obj_start: - print(f"{red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'.") + print( + f"{icons.red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'." + ) return t = self.model.Tables[table_name] @@ -2987,59 +3598,75 @@ def add_incremental_refresh_policy(self, table_name: str, column_name: str, star dType = c.DataType if dType != TOM.DataType.DateTime: - print(f"{red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type.") + print( + f"{icons.red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type." + ) return - + if detect_data_changes_column is not None: dc = t.Columns[detect_data_changes_column] dcType = dc.DataType if dcType != TOM.DataType.DateTime: - print(f"{red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type.") + print( + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + ) return # Start changes: # Update partition expression - i=0 + i = 0 for p in t.Partitions: if p.SourceType != TOM.PartitionSourceType.M: - print(f"{red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition.") + print( + f"{icons.red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition." + ) return - elif i==0: + elif i == 0: text = p.Expression text = text.rstrip() - ind = text.rfind(' ') + 1 + ind = text.rfind(" ") + 1 obj = text[ind:] pattern = r"in\s*[^ ]*" matches = list(re.finditer(pattern, text)) if matches: last_match = matches[-1] - text_before_last_match = text[:last_match.start()] + text_before_last_match = text[: last_match.start()] print(text_before_last_match) else: - print(f"{red_dot} Invalid M-partition expression.") + print(f"{icons.red_dot} Invalid M-partition expression.") return - + endExpr = f'#"Filtered Rows IR" = Table.SelectRows({obj}, each [{column_name}] >= RangeStart and [{column_name}] <= RangeEnd)\n#"Filtered Rows IR"' finalExpr = text_before_last_match + endExpr p.Expression = finalExpr - i+=1 + i += 1 # Add expressions - self.add_expression(name = 'RangeStart', expression = f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]') - self.add_expression(name = 'RangeEnd', expression = f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]') + self.add_expression( + name="RangeStart", + expression=f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) + self.add_expression( + name="RangeEnd", + expression=f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) # Update properties rp = TOM.BasicRefreshPolicy() rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse(TOM.RefreshGranularityType, incremental_granularity) + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse(TOM.RefreshGranularityType, rolling_window_granularity) + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) if only_refresh_complete_days: rp.IncrementalPeriodsOffset = -1 @@ -3055,8 +3682,13 @@ def add_incremental_refresh_policy(self, table_name: str, column_name: str, star self.show_incremental_refresh_policy(table_name=table_name) - def apply_refresh_policy(self, table_name: str, effective_date: Optional[datetime] = None, refresh: Optional[bool] = True, max_parallelism: Optional[int] = 0): - + def apply_refresh_policy( + self, + table_name: str, + effective_date: Optional[datetime] = None, + refresh: Optional[bool] = True, + max_parallelism: Optional[int] = 0, + ): """ Applies the incremental refresh policy for a table within a semantic model. @@ -3076,10 +3708,15 @@ def apply_refresh_policy(self, table_name: str, effective_date: Optional[datetim """ - self.model.Tables[table_name].ApplyRefreshPolicy(effectiveDate = effective_date, refresh = refresh, maxParallelism = max_parallelism) - - def set_data_coverage_definition(self, table_name: str, partition_name: str, expression: str): + self.model.Tables[table_name].ApplyRefreshPolicy( + effectiveDate=effective_date, + refresh=refresh, + maxParallelism=max_parallelism, + ) + def set_data_coverage_definition( + self, table_name: str, partition_name: str, expression: str + ): """ Sets the data coverage definition for a partition. @@ -3097,18 +3734,22 @@ def set_data_coverage_definition(self, table_name: str, partition_name: str, exp """ - doc = 'https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions' + doc = "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions" t = self.model.Tables[table_name] p = t.Partitions[partition_name] - ht = self.is_hybrid_table(table_name = table_name) + ht = self.is_hybrid_table(table_name=table_name) if not ht: - print(f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}.") + print( + f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}." + ) return if p.Mode != TOM.ModeType.DirectQuery: - print(f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}.") + print( + f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}." + ) return dcd = TOM.DataCoverageDefinition() @@ -3116,7 +3757,6 @@ def set_data_coverage_definition(self, table_name: str, partition_name: str, exp p.DataCoverageDefinition = dcd def set_encoding_hint(self, table_name: str, column_name: str, value: str): - """ Sets the encoding hint for a column. @@ -3134,17 +3774,20 @@ def set_encoding_hint(self, table_name: str, column_name: str, value: str): """ - values = ['Default', 'Hash', 'Value'] + values = ["Default", "Hash", "Value"] value = value.capitalize() if value not in values: - print(f"{red_dot} Invalid encoding hint value. Please choose from these options: {values}.") + print( + f"{icons.red_dot} Invalid encoding hint value. Please choose from these options: {values}." + ) return - self.model.Tables[table_name].Columns[column_name].EncodingHint = System.Enum.Parse(TOM.EncodingHintType, value) + self.model.Tables[table_name].Columns[column_name].EncodingHint = ( + System.Enum.Parse(TOM.EncodingHintType, value) + ) def set_data_type(self, table_name: str, column_name: str, value: str): - """ Sets the data type for a column. @@ -3162,26 +3805,39 @@ def set_data_type(self, table_name: str, column_name: str, value: str): """ - values = ['Binary', 'Boolean', 'DateTime', 'Decimal', 'Double', 'Int64', 'String'] + values = [ + "Binary", + "Boolean", + "DateTime", + "Decimal", + "Double", + "Int64", + "String", + ] + + value = value.replace(" ", "").capitalize() + if value == "Datetime": + value = "DateTime" + elif value.startswith("Int"): + value = "Int64" + elif value.startswith("Bool"): + value = "Boolean" - value = value.replace(' ','').capitalize() - if value == 'Datetime': - value = 'DateTime' - elif value.startswith('Int'): - value = 'Int64' - elif value.startswith('Bool'): - value = 'Boolean' - if value not in values: - print(f"{red_dot} Invalid data type. Please choose from these options: {values}.") + print( + f"{icons.red_dot} Invalid data type. Please choose from these options: {values}." + ) return - - self.model.Tables[table_name].Columns[column_name].DataType = System.Enum.Parse(TOM.DataType, value) - def add_time_intelligence(self, measure_name: str, date_table: str, time_intel: Union[str, List[str]]): + self.model.Tables[table_name].Columns[column_name].DataType = ( + System.Enum.Parse(TOM.DataType, value) + ) + def add_time_intelligence( + self, measure_name: str, date_table: str, time_intel: Union[str, List[str]] + ): """ - Adds time intelligence measures + Adds time intelligence measures Parameters ---------- @@ -3198,16 +3854,18 @@ def add_time_intelligence(self, measure_name: str, date_table: str, time_intel: """ table_name = None - time_intel_options = ['MTD', 'QTD', 'YTD'] + time_intel_options = ["MTD", "QTD", "YTD"] if isinstance(time_intel, str): time_intel = [time_intel] - + # Validate time intelligence variations for t in time_intel: t = t.capitalize() if t not in [time_intel_options]: - print(f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}.") + print( + f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}." + ) return # Validate measure and extract table name @@ -3216,14 +3874,18 @@ def add_time_intelligence(self, measure_name: str, date_table: str, time_intel: table_name = m.Parent.Name if table_name is None: - print(f"The '{measure_name}' is not a valid measure in the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The '{measure_name}' is not a valid measure in the '{dataset}' semantic model within the '{workspace}' workspace." + ) return - + # Validate date table if not self.is_date_table(date_table): - print(f"{red_dot} The '{date_table}' table is not a valid date table in the '{dataset}' wemantic model within the '{workspace}' workspace.") + print( + f"{icons.red_dot} The '{date_table}' table is not a valid date table in the '{dataset}' wemantic model within the '{workspace}' workspace." + ) return - + # Extract date key from date table for c in self.all_columns(): if c.Parent.Name == date_table and c.IsKey: @@ -3231,21 +3893,27 @@ def add_time_intelligence(self, measure_name: str, date_table: str, time_intel: # Create the new time intelligence measures for t in time_intel: - if t == 'MTD': + if t == "MTD": expr = f"CALCULATE([{measure_name}],DATES{time_intel}('{date_table}'[{date_key}]))" new_meas_name = f"{measure_name} {t}" - self.add_measure(table_name = table_name, measure_name = new_meas_name, expression = expr) - + self.add_measure( + table_name=table_name, + measure_name=new_meas_name, + expression=expr, + ) + def close(self): if not readonly and self.model is not None: self.model.SaveChanges() if len(fpAdded) > 0: - refresh_semantic_model(dataset = dataset, tables = fpAdded, workspace = workspace) + refresh_semantic_model( + dataset=dataset, tables=fpAdded, workspace=workspace + ) self.model = None - tw = TOMWrapper(dataset = dataset, workspace = workspace, readonly = readonly) - try: - yield tw + tw = TOMWrapper(dataset=dataset, workspace=workspace, readonly=readonly) + try: + yield tw finally: tw.close() diff --git a/sempy_labs/Translations.py b/sempy_labs/Translations.py index 0f389ce3..9dc4ca3b 100644 --- a/sempy_labs/Translations.py +++ b/sempy_labs/Translations.py @@ -1,14 +1,10 @@ import pandas as pd from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' def language_validate(language: str): - """ Validateds that the language specified exists within the supported langauges. @@ -22,29 +18,36 @@ def language_validate(language: str): bool A True/False indication as to whether the language code is supported. """ - - url = 'https://learn.microsoft.com/azure/ai-services/translator/language-support' + + url = "https://learn.microsoft.com/azure/ai-services/translator/language-support" tables = pd.read_html(url) df = tables[0] - df_filt = df[df['Language code'] == language] + df_filt = df[df["Language code"] == language] - df_filt2 = df[df['Language'] == language.capitalize()] + df_filt2 = df[df["Language"] == language.capitalize()] if len(df_filt) == 1: - lang = df_filt['Language'].iloc[0] + lang = df_filt["Language"].iloc[0] elif len(df_filt2) == 1: - lang = df_filt2['Language'].iloc[0] + lang = df_filt2["Language"].iloc[0] else: - print(f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}.") + print( + f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}." + ) return return lang -@log -def translate_semantic_model(dataset: str, languages: Union[str, List[str]], exclude_characters: Optional[str] = None, workspace: Optional[str] = None): +@log +def translate_semantic_model( + dataset: str, + languages: Union[str, List[str]], + exclude_characters: Optional[str] = None, + workspace: Optional[str] = None, +): """ Translates names, descriptions, display folders for all objects in a semantic model. @@ -63,7 +66,7 @@ def translate_semantic_model(dataset: str, languages: Union[str, List[str]], exc Returns ------- - + """ from synapse.ml.services import Translate @@ -74,67 +77,151 @@ def translate_semantic_model(dataset: str, languages: Union[str, List[str]], exc if isinstance(languages, str): languages = [languages] - dfPrep = pd.DataFrame(columns=['Object Type', 'Name', 'Description', 'Display Folder']) + dfPrep = pd.DataFrame( + columns=["Object Type", "Name", "Description", "Display Folder"] + ) - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: if exclude_characters is None: for o in tom.model.Tables: - new_data = {'Object Type': 'Table', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': None, 'TDisplay Folder': None} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Type": "Table", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": None, + "TDisplay Folder": None, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_columns(): - new_data = {'Object Type': 'Column', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Type": "Column", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_measures(): - new_data = {'Object Type': 'Measure', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Type": "Measure", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_hierarchies(): - new_data = {'Object Type': 'Hierarchy', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Type": "Hierarchy", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) else: for o in tom.model.Tables: oName = o.Name oDescription = o.Description for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - new_data = {'Object Type': 'Table', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': None, 'TDisplay Folder': None} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + new_data = { + "Object Type": "Table", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": None, + "TDisplay Folder": None, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_columns(): oName = o.Name oDescription = o.Description oDisplayFolder = o.DisplayFolder for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Column', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Column", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_measures(): oName = o.Name oDescription = o.Description oDisplayFolder = o.DisplayFolder for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Measure', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Measure", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for o in tom.all_hierarchies(): oName = o.Name oDescription = o.Description oDisplayFolder = o.DisplayFolder for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Hierarchy', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Hierarchy", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) spark = SparkSession.builder.getOrCreate() df = spark.createDataFrame(dfPrep) - columns = ['Name', 'Description', 'Display Folder'] + columns = ["Name", "Description", "Display Folder"] for clm in columns: columnToTranslate = f"T{clm}" @@ -146,83 +233,183 @@ def translate_semantic_model(dataset: str, languages: Union[str, List[str]], exc .setConcurrency(5) ) - transDF = (translate - .transform(df) + transDF = ( + translate.transform(df) .withColumn("translation", flatten(col("translation.translations"))) .withColumn("translation", col("translation.text")) - .select('Object Type', clm, columnToTranslate, 'translation')) + .select("Object Type", clm, columnToTranslate, "translation") + ) df_panda = transDF.toPandas() - print(f"{in_progress} Translating {clm}s...") + print(f"{icons.in_progress} Translating {clm}s...") for lang in languages: i = languages.index(lang) - tom.add_translation(language = lang) - print(f"{in_progress} Translating into the '{lang}' language...") + tom.add_translation(language=lang) + print(f"{icons.in_progress} Translating into the '{lang}' language...") for t in tom.model.Tables: if t.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Table') & (df_panda['Name'] == t.Name)] + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Table") + & (df_panda["Name"] == t.Name) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = t, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set for the '{lang}' language on the '{t.Name}' table.") - elif clm == 'Description' and t.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Table') & (df_panda['Description'] == t.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=t, language=lang, property="Name", value=tr + ) + print( + f"{icons.green_dot} Translation '{tr}' set for the '{lang}' language on the '{t.Name}' table." + ) + elif clm == "Description" and t.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Table") + & (df_panda["Description"] == t.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = t, language = lang, property = 'Description', value = tr) + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=t, + language=lang, + property="Description", + value=tr, + ) for c in t.Columns: if c.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Name'] == c.Name)] + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Name"] == c.Name) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set on the '{c.Name}' column within the {t.Name}' table.") - elif clm == 'Description' and c.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Description'] == c.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Name", + value=tr, + ) + print( + f"{icons.green_dot} Translation '{tr}' set on the '{c.Name}' column within the {t.Name}' table." + ) + elif clm == "Description" and c.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Description"] == c.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and c.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Display Folder'] == c.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" + and c.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Display Folder"] == c.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Display Folder', value = tr) + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Display Folder", + value=tr, + ) for h in t.Hierarchies: if h.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Name'] == h.Name)] + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Name"] == h.Name) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Name', value = tr) - elif clm == 'Description' and h.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Description'] == h.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Name", + value=tr, + ) + elif clm == "Description" and h.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Description"] == h.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and h.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Display Folder'] == h.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" + and h.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Display Folder"] == h.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Display Folder', value = tr) + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Display Folder", + value=tr, + ) for ms in t.Measures: if ms.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Name'] == ms.Name)] + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Name"] == ms.Name) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set on the '{ms.Name}' column within the {t.Name}' table.") - elif clm == 'Description' and ms.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Description'] == ms.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and ms.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Display Folder'] == ms.Description)] + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Name", + value=tr, + ) + print( + f"{icons.green_dot} Translation '{tr}' set on the '{ms.Name}' column within the {t.Name}' table." + ) + elif clm == "Description" and ms.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Description"] == ms.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" and ms.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Display Folder"] == ms.Description) + ] if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Display Folder', value = tr) + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Display Folder", + value=tr, + ) diff --git a/sempy_labs/Vertipaq.py b/sempy_labs/Vertipaq.py index ca50a740..f2a132ff 100644 --- a/sempy_labs/Vertipaq.py +++ b/sempy_labs/Vertipaq.py @@ -4,16 +4,26 @@ from IPython.display import display, HTML import zipfile, os, shutil, datetime, warnings from pyspark.sql import SparkSession -from .HelperFunctions import format_dax_object_name, get_direct_lake_sql_endpoint, resolve_lakehouse_name -from .ListFunctions import list_relationships -from .GetLakehouseTables import get_lakehouse_tables -from .Lakehouse import lakehouse_attached +from ._helper_functions import ( + format_dax_object_name, + get_direct_lake_sql_endpoint, + resolve_lakehouse_name, +) +from ._list_functions import list_relationships +from .lakehouse.GetLakehouseTables import get_lakehouse_tables +from .lakehouse.Lakehouse import lakehouse_attached from typing import List, Optional, Union from sempy._utils._log import log + @log -def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Optional[str] = None, lakehouse_workspace: Optional[str] = None, read_stats_from_data: Optional[bool] = False): - +def vertipaq_analyzer( + dataset: str, + workspace: Optional[str] = None, + export: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, + read_stats_from_data: Optional[bool] = False, +): """ Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model. @@ -26,8 +36,8 @@ def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Opt Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. export : str, default=None - Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function. - Specifying 'table' will export the results to delta tables (appended) in your lakehouse. + Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function. + Specifying 'table' will export the results to delta tables (appended) in your lakehouse. Default value: None. lakehouse_workspace : str, default=None The Fabric workspace used by the lakehouse (for Direct Lake semantic models). @@ -42,72 +52,93 @@ def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Opt """ pd.options.mode.copy_on_write = True - warnings.filterwarnings("ignore", message="createDataFrame attempted Arrow optimization*") + warnings.filterwarnings( + "ignore", message="createDataFrame attempted Arrow optimization*" + ) if workspace == None: workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) + workspace = fabric.resolve_workspace_name(workspace_id) if lakehouse_workspace == None: lakehouse_workspace = workspace - dfT = fabric.list_tables(dataset = dataset, extended=True, workspace = workspace) - dfT.rename(columns={'Name': 'Table Name'}, inplace=True) - dfC = fabric.list_columns(dataset = dataset, extended=True, workspace = workspace) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC.rename(columns={'Column Cardinality': 'Cardinality'}, inplace=True) - dfH = fabric.list_hierarchies(dataset = dataset, extended=True, workspace = workspace) - dfR = list_relationships(dataset = dataset, extended=True, workspace = workspace) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfP = fabric.list_partitions(dataset = dataset, extended=True, workspace = workspace) - dfD = fabric.list_datasets(workspace = workspace, additional_xmla_properties=['CompatibilityLevel','Model.DefaultMode']) - dfD = dfD[dfD['Dataset Name'] == dataset] - dfD['Compatibility Level'] = dfD['Compatibility Level'].astype(int) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - dfR['Missing Rows'] = None + dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace) + dfT.rename(columns={"Name": "Table Name"}, inplace=True) + dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True) + dfH = fabric.list_hierarchies(dataset=dataset, extended=True, workspace=workspace) + dfR = list_relationships(dataset=dataset, extended=True, workspace=workspace) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace) + dfD = fabric.list_datasets( + workspace=workspace, + additional_xmla_properties=["CompatibilityLevel", "Model.DefaultMode"], + ) + dfD = dfD[dfD["Dataset Name"] == dataset] + dfD["Compatibility Level"] = dfD["Compatibility Level"].astype(int) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) + dfR["Missing Rows"] = None # Direct Lake if read_stats_from_data: if isDirectLake: - dfC = pd.merge(dfC, dfP[['Table Name', 'Query', 'Source Type']], on='Table Name', how='left') - dfC_flt = dfC[(dfC['Source Type'] == 'Entity') & (~dfC['Column Name'].str.startswith('RowNumber-'))] + dfC = pd.merge( + dfC, + dfP[["Table Name", "Query", "Source Type"]], + on="Table Name", + how="left", + ) + dfC_flt = dfC[ + (dfC["Source Type"] == "Entity") + & (~dfC["Column Name"].str.startswith("RowNumber-")) + ] sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) # Get lakehouse name from SQL Endpoint ID - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] if len(dfI_filt) == 0: - print(f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.") + print( + f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter." + ) else: - lakehouseName = dfI_filt['Display Name'].iloc[0] + lakehouseName = dfI_filt["Display Name"].iloc[0] current_workspace_id = fabric.get_workspace_id() current_workspace = fabric.resolve_workspace_name(current_workspace_id) if current_workspace != lakehouse_workspace: - lakeTables = get_lakehouse_tables(lakehouse = lakehouseName, workspace = lakehouse_workspace) + lakeTables = get_lakehouse_tables( + lakehouse=lakehouseName, workspace=lakehouse_workspace + ) sql_statements = [] spark = SparkSession.builder.getOrCreate() # Loop through tables - for lakeTName in dfC_flt['Query'].unique(): - query = 'SELECT ' - columns_in_table = dfC_flt.loc[dfC_flt['Query'] == lakeTName, 'Source'].unique() - + for lakeTName in dfC_flt["Query"].unique(): + query = "SELECT " + columns_in_table = dfC_flt.loc[ + dfC_flt["Query"] == lakeTName, "Source" + ].unique() + # Loop through columns within those tables for scName in columns_in_table: query = query + f"COUNT(DISTINCT({scName})) AS {scName}, " - + query = query[:-2] if lakehouse_workspace == current_workspace: query = query + f" FROM {lakehouseName}.{lakeTName}" else: - lakeTables_filt = lakeTables[lakeTables['Table Name'] == lakeTName] - tPath = lakeTables_filt['Location'].iloc[0] + lakeTables_filt = lakeTables[ + lakeTables["Table Name"] == lakeTName + ] + tPath = lakeTables_filt["Location"].iloc[0] df = spark.read.format("delta").load(tPath) - tempTableName = 'delta_table_' + lakeTName + tempTableName = "delta_table_" + lakeTName df.createOrReplaceTempView(tempTableName) query = query + f" FROM {tempTableName}" sql_statements.append((lakeTName, query)) @@ -117,364 +148,650 @@ def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Opt query = o[1] df = spark.sql(query) - + for column in df.columns: x = df.collect()[0][column] for i, r in dfC.iterrows(): - if r['Query'] == tName and r['Source'] == column: - dfC.at[i, 'Cardinality'] = x + if r["Query"] == tName and r["Source"] == column: + dfC.at[i, "Cardinality"] = x # Remove column added temporarily - dfC.drop(columns=['Query', 'Source Type'], inplace=True) + dfC.drop(columns=["Query", "Source Type"], inplace=True) # Direct Lake missing rows - dfR = pd.merge(dfR, dfP[['Table Name', 'Query']], left_on = 'From Table', right_on = 'Table Name', how = 'left') - dfR.rename(columns={'Query': 'From Lake Table'}, inplace=True) - dfR.drop(columns=['Table Name'], inplace=True) - dfR = pd.merge(dfR, dfP[['Table Name', 'Query']], left_on = 'To Table', right_on = 'Table Name', how = 'left') - dfR.rename(columns={'Query': 'To Lake Table'}, inplace=True) - dfR.drop(columns=['Table Name'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Source']], left_on = 'From Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Source': 'From Lake Column'}, inplace=True) - dfR.drop(columns=['Column Object'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Source']], left_on = 'To Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Source': 'To Lake Column'}, inplace=True) - dfR.drop(columns=['Column Object'], inplace=True) + dfR = pd.merge( + dfR, + dfP[["Table Name", "Query"]], + left_on="From Table", + right_on="Table Name", + how="left", + ) + dfR.rename(columns={"Query": "From Lake Table"}, inplace=True) + dfR.drop(columns=["Table Name"], inplace=True) + dfR = pd.merge( + dfR, + dfP[["Table Name", "Query"]], + left_on="To Table", + right_on="Table Name", + how="left", + ) + dfR.rename(columns={"Query": "To Lake Table"}, inplace=True) + dfR.drop(columns=["Table Name"], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Source"]], + left_on="From Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Source": "From Lake Column"}, inplace=True) + dfR.drop(columns=["Column Object"], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Source"]], + left_on="To Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Source": "To Lake Column"}, inplace=True) + dfR.drop(columns=["Column Object"], inplace=True) spark = SparkSession.builder.getOrCreate() for i, r in dfR.iterrows(): - fromTable = r['From Lake Table'] - fromColumn = r['From Lake Column'] - toTable= r['To Lake Table'] - toColumn = r['To Lake Column'] + fromTable = r["From Lake Table"] + fromColumn = r["From Lake Column"] + toTable = r["To Lake Table"] + toColumn = r["To Lake Column"] if lakehouse_workspace == current_workspace: query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" else: - tempTableFrom = 'delta_table_' + fromTable - tempTableTo = 'delta_table_' + toTable + tempTableFrom = "delta_table_" + fromTable + tempTableTo = "delta_table_" + toTable query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" - - #query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" + + # query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" df = spark.sql(query) missingRows = df.collect()[0][0] - dfR.at[i, 'Missing Rows'] = missingRows - - dfR['Missing Rows'] = dfR['Missing Rows'].astype(int) + dfR.at[i, "Missing Rows"] = missingRows + + dfR["Missing Rows"] = dfR["Missing Rows"].astype(int) else: # Calculate missing rows using DAX for non-direct lake for i, r in dfR.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable= r['To Table'] - toColumn = r['To Column'] - isActive = bool(r['Active']) + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + isActive = bool(r["Active"]) fromObject = format_dax_object_name(fromTable, fromColumn) - toObject= format_dax_object_name(toTable, toColumn) + toObject = format_dax_object_name(toTable, toColumn) missingRows = 0 query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)" - if isActive == False: # add userelationship + if isActive == False: # add userelationship query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)" - - result = fabric.evaluate_dax(dataset = dataset, dax_string = query, workspace = workspace) - try: - missingRows = result.iloc[0,0] + result = fabric.evaluate_dax( + dataset=dataset, dax_string=query, workspace=workspace + ) + + try: + missingRows = result.iloc[0, 0] except: pass - - dfR.at[i, 'Missing Rows'] = missingRows - dfR['Missing Rows'] = dfR['Missing Rows'].astype(int) - dfTP = dfP.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Partitions'}, inplace=True) - dfTC = dfC.groupby('Table Name')['Column Name'].count().reset_index() - dfTC.rename(columns={'Column Name': 'Columns'}, inplace=True) + dfR.at[i, "Missing Rows"] = missingRows + dfR["Missing Rows"] = dfR["Missing Rows"].astype(int) + + dfTP = dfP.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Partitions"}, inplace=True) + dfTC = dfC.groupby("Table Name")["Column Name"].count().reset_index() + dfTC.rename(columns={"Column Name": "Columns"}, inplace=True) - total_size = dfC['Total Size'].sum() - table_sizes = dfC.groupby('Table Name')['Total Size'].sum().reset_index() - table_sizes.rename(columns={'Total Size': 'Table Size'}, inplace=True) + total_size = dfC["Total Size"].sum() + table_sizes = dfC.groupby("Table Name")["Total Size"].sum().reset_index() + table_sizes.rename(columns={"Total Size": "Table Size"}, inplace=True) # Columns - dfC_filt = dfC[~dfC['Column Name'].str.startswith('RowNumber-')] - dfC_filt['% DB'] = round((dfC_filt['Total Size'] / total_size) * 100,2) - dfC_filt = pd.merge(dfC_filt, table_sizes, on = 'Table Name', how = 'left') - dfC_filt['% Table'] = round((dfC_filt['Total Size'] / dfC_filt['Table Size']) * 100,2) - columnList = ['Table Name', 'Column Name', 'Type', 'Cardinality', 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size','% Table', '% DB', 'Data Type', 'Encoding', 'Is Resident', 'Temperature', 'Last Accessed'] - - colSize = dfC_filt[columnList].sort_values(by='Total Size', ascending=False) - temp = dfC_filt[columnList].sort_values(by='Temperature', ascending=False) + dfC_filt = dfC[~dfC["Column Name"].str.startswith("RowNumber-")] + dfC_filt["% DB"] = round((dfC_filt["Total Size"] / total_size) * 100, 2) + dfC_filt = pd.merge(dfC_filt, table_sizes, on="Table Name", how="left") + dfC_filt["% Table"] = round( + (dfC_filt["Total Size"] / dfC_filt["Table Size"]) * 100, 2 + ) + columnList = [ + "Table Name", + "Column Name", + "Type", + "Cardinality", + "Total Size", + "Data Size", + "Dictionary Size", + "Hierarchy Size", + "% Table", + "% DB", + "Data Type", + "Encoding", + "Is Resident", + "Temperature", + "Last Accessed", + ] + + colSize = dfC_filt[columnList].sort_values(by="Total Size", ascending=False) + temp = dfC_filt[columnList].sort_values(by="Temperature", ascending=False) colSize.reset_index(drop=True, inplace=True) temp.reset_index(drop=True, inplace=True) export_Col = colSize.copy() - intList = ['Cardinality', 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - pctList = ['% Table', '% DB'] - colSize[intList] = colSize[intList].applymap('{:,}'.format) - temp[intList] = temp[intList].applymap('{:,}'.format) - colSize[pctList] = colSize[pctList].applymap('{:.2f}%'.format) - temp[pctList] = temp[pctList].applymap('{:.2f}%'.format) + intList = [ + "Cardinality", + "Total Size", + "Data Size", + "Dictionary Size", + "Hierarchy Size", + ] + pctList = ["% Table", "% DB"] + colSize[intList] = colSize[intList].applymap("{:,}".format) + temp[intList] = temp[intList].applymap("{:,}".format) + colSize[pctList] = colSize[pctList].applymap("{:.2f}%".format) + temp[pctList] = temp[pctList].applymap("{:.2f}%".format) # Tables - intList = ['Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - dfCSum = dfC.groupby(['Table Name'])[intList].sum().reset_index() - dfCSum['% DB'] = round((dfCSum['Total Size'] / total_size) * 100,2) - - dfTable = pd.merge(dfT[['Table Name', 'Type', 'Row Count']], dfCSum, on = 'Table Name', how = 'inner') - dfTable = pd.merge(dfTable,dfTP, on = 'Table Name', how = 'left') - dfTable = pd.merge(dfTable,dfTC, on = 'Table Name', how = 'left') - dfTable = dfTable.drop_duplicates() #Drop duplicates (temporary) - dfTable = dfTable.sort_values(by='Total Size', ascending=False) + intList = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"] + dfCSum = dfC.groupby(["Table Name"])[intList].sum().reset_index() + dfCSum["% DB"] = round((dfCSum["Total Size"] / total_size) * 100, 2) + + dfTable = pd.merge( + dfT[["Table Name", "Type", "Row Count"]], dfCSum, on="Table Name", how="inner" + ) + dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left") + dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left") + dfTable = dfTable.drop_duplicates() # Drop duplicates (temporary) + dfTable = dfTable.sort_values(by="Total Size", ascending=False) dfTable.reset_index(drop=True, inplace=True) export_Table = dfTable.copy() - intList.extend(['Row Count', 'Partitions', 'Columns']) - dfTable[intList] = dfTable[intList].applymap('{:,}'.format) - pctList = ['% DB'] - dfTable[pctList] = dfTable[pctList].applymap('{:.2f}%'.format) - - ## Relationships - #dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Cardinality']], left_on = 'From Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Cardinality': 'Max From Cardinality'}, inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Cardinality']], left_on = 'To Object', right_on = 'Column Object', how='left') - dfR.rename(columns={'Cardinality': 'Max To Cardinality'}, inplace=True) - dfR = dfR[['From Object', 'To Object', 'Multiplicity', 'Used Size', 'Max From Cardinality', 'Max To Cardinality', 'Missing Rows']].sort_values(by='Used Size', ascending=False) + intList.extend(["Row Count", "Partitions", "Columns"]) + dfTable[intList] = dfTable[intList].applymap("{:,}".format) + pctList = ["% DB"] + dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format) + + ## Relationships + # dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Cardinality"]], + left_on="From Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Cardinality": "Max From Cardinality"}, inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Cardinality"]], + left_on="To Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Cardinality": "Max To Cardinality"}, inplace=True) + dfR = dfR[ + [ + "From Object", + "To Object", + "Multiplicity", + "Used Size", + "Max From Cardinality", + "Max To Cardinality", + "Missing Rows", + ] + ].sort_values(by="Used Size", ascending=False) dfR.reset_index(drop=True, inplace=True) export_Rel = dfR.copy() - intList = ['Used Size', 'Max From Cardinality', 'Max To Cardinality', 'Missing Rows'] + intList = [ + "Used Size", + "Max From Cardinality", + "Max To Cardinality", + "Missing Rows", + ] if read_stats_from_data == False: - intList.remove('Missing Rows') - dfR[intList] = dfR[intList].applymap('{:,}'.format) + intList.remove("Missing Rows") + dfR[intList] = dfR[intList].applymap("{:,}".format) ## Partitions - dfP = dfP[['Table Name', 'Partition Name', 'Mode', 'Record Count', 'Segment Count']].sort_values(by='Record Count', ascending=False) #, 'Records per Segment' - dfP['Records per Segment'] = round(dfP['Record Count'] / dfP['Segment Count'],2) # Remove after records per segment is fixed + dfP = dfP[ + ["Table Name", "Partition Name", "Mode", "Record Count", "Segment Count"] + ].sort_values( + by="Record Count", ascending=False + ) # , 'Records per Segment' + dfP["Records per Segment"] = round( + dfP["Record Count"] / dfP["Segment Count"], 2 + ) # Remove after records per segment is fixed dfP.reset_index(drop=True, inplace=True) export_Part = dfP.copy() - intList = ['Record Count', 'Segment Count', 'Records per Segment'] - dfP[intList] = dfP[intList].applymap('{:,}'.format) + intList = ["Record Count", "Segment Count", "Records per Segment"] + dfP[intList] = dfP[intList].applymap("{:,}".format) ## Hierarchies - dfH_filt = dfH[dfH['Level Ordinal'] == 0] - dfH_filt = dfH_filt[['Table Name', 'Hierarchy Name', 'Used Size']].sort_values(by='Used Size', ascending=False) + dfH_filt = dfH[dfH["Level Ordinal"] == 0] + dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values( + by="Used Size", ascending=False + ) dfH_filt.reset_index(drop=True, inplace=True) export_Hier = dfH_filt.copy() - intList = ['Used Size'] - dfH_filt[intList] = dfH_filt[intList].applymap('{:,}'.format) + intList = ["Used Size"] + dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format) ## Model if total_size >= 1000000000: - y = total_size / (1024 ** 3) * 1000000000 + y = total_size / (1024**3) * 1000000000 elif total_size >= 1000000: - y = total_size / (1024 ** 2) * 1000000 + y = total_size / (1024**2) * 1000000 elif total_size >= 1000: y = total_size / (1024) * 1000 y = round(y) tblCount = len(dfT) colCount = len(dfC_filt) - compatLevel = dfD['Compatibility Level'].iloc[0] - defMode = dfD['Model Default Mode'].iloc[0] - - dfModel = pd.DataFrame({'Dataset Name': dataset, 'Total Size': y, 'Table Count': tblCount, 'Column Count': colCount, 'Compatibility Level': compatLevel, 'Default Mode': defMode}, index=[0]) + compatLevel = dfD["Compatibility Level"].iloc[0] + defMode = dfD["Model Default Mode"].iloc[0] + + dfModel = pd.DataFrame( + { + "Dataset Name": dataset, + "Total Size": y, + "Table Count": tblCount, + "Column Count": colCount, + "Compatibility Level": compatLevel, + "Default Mode": defMode, + }, + index=[0], + ) dfModel.reset_index(drop=True, inplace=True) export_Model = dfModel.copy() - intList = ['Total Size', 'Table Count', 'Column Count'] - dfModel[intList] = dfModel[intList].applymap('{:,}'.format) + intList = ["Total Size", "Table Count", "Column Count"] + dfModel[intList] = dfModel[intList].applymap("{:,}".format) dataFrames = { - 'dfModel': dfModel, - 'dfTable': dfTable, - 'dfP': dfP, - 'colSize': colSize, - 'temp': temp, - 'dfR': dfR, - 'dfH_filt': dfH_filt + "dfModel": dfModel, + "dfTable": dfTable, + "dfP": dfP, + "colSize": colSize, + "temp": temp, + "dfR": dfR, + "dfH_filt": dfH_filt, } dfs = {} for fileName, df in dataFrames.items(): dfs[fileName] = df - + visualize_vertipaq(dfs) ### Export vertipaq to delta tables in lakehouse - if export in ['table','zip']: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") + if export in ["table", "zip"]: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) return - - if export == 'table': - spark = SparkSession.builder.getOrCreate() - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) - lakeTName = 'vertipaq_analyzer_model' - - lakeT = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeT_filt = lakeT[lakeT['Table Name'] == lakeTName] - - query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" - - if len(lakeT_filt) == 0: - runId = 1 - else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 - - dfMap = { - 'export_Col': ['Columns', export_Col], - 'export_Table': ['Tables', export_Table], - 'export_Part': ['Partitions', export_Part], - 'export_Rel': ['Relationships', export_Rel], - 'export_Hier': ['Hierarchies', export_Hier], - 'export_Model': ['Model', export_Model] + + if export == "table": + spark = SparkSession.builder.getOrCreate() + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) + lakeTName = "vertipaq_analyzer_model" + + lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName] + + query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" + + if len(lakeT_filt) == 0: + runId = 1 + else: + dfSpark = spark.sql(query) + maxRunId = dfSpark.collect()[0][0] + runId = maxRunId + 1 + + dfMap = { + "export_Col": ["Columns", export_Col], + "export_Table": ["Tables", export_Table], + "export_Part": ["Partitions", export_Part], + "export_Rel": ["Relationships", export_Rel], + "export_Hier": ["Hierarchies", export_Hier], + "export_Model": ["Model", export_Model], } - - print(f"Saving Vertipaq Analyzer to delta tables in the lakehouse...\n") - now = datetime.datetime.now() - for key, (obj, df) in dfMap.items(): - df['Timestamp'] = now - df['Workspace Name'] = workspace - df['Dataset Name'] = dataset - df['RunId'] = runId - - colName = 'Workspace Name' - df.insert(0, colName, df.pop(colName)) - colName = 'Dataset Name' - df.insert(1, colName, df.pop(colName)) - - df.columns = df.columns.str.replace(' ', '_') - - delta_table_name = f"VertipaqAnalyzer_{obj}".lower() - spark_df = spark.createDataFrame(df) - spark_df.write.mode('append').format('delta').saveAsTable(delta_table_name) - print(f"\u2022 Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table.") + + print(f"Saving Vertipaq Analyzer to delta tables in the lakehouse...\n") + now = datetime.datetime.now() + for key, (obj, df) in dfMap.items(): + df["Timestamp"] = now + df["Workspace Name"] = workspace + df["Dataset Name"] = dataset + df["RunId"] = runId + + colName = "Workspace Name" + df.insert(0, colName, df.pop(colName)) + colName = "Dataset Name" + df.insert(1, colName, df.pop(colName)) + + df.columns = df.columns.str.replace(" ", "_") + + delta_table_name = f"VertipaqAnalyzer_{obj}".lower() + spark_df = spark.createDataFrame(df) + spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name) + print( + f"\u2022 Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table." + ) ### Export vertipaq to zip file within the lakehouse - if export == 'zip': - dataFrames = { - 'dfModel': dfModel, - 'dfTable': dfTable, - 'dfP': dfP, - 'colSize': colSize, - 'temp': temp, - 'dfR': dfR, - 'dfH_filt': dfH_filt - } - - zipFileName = f"{workspace}.{dataset}.zip" - - folderPath = '/lakehouse/default/Files' - subFolderPath = os.path.join(folderPath, 'VertipaqAnalyzer') - ext = '.csv' - if not os.path.exists(subFolderPath): - os.makedirs(subFolderPath, exist_ok=True) - zipFilePath = os.path.join(subFolderPath, zipFileName) - - # Create CSV files based on dataframes - for fileName, df in dataFrames.items(): - filePath = os.path.join(subFolderPath, fileName + ext) - df.to_csv(filePath, index=False) - - # Create a zip file and add CSV files to it - with zipfile.ZipFile(zipFilePath, 'w') as zipf: - for fileName in dataFrames: - filePath = os.path.join(subFolderPath, fileName + ext) - zipf.write(filePath, os.path.basename(filePath)) - - # Clean up: remove the individual CSV files - for fileName, df in dataFrames.items(): - filePath = os.path.join(subFolderPath, fileName) + ext - if os.path.exists(filePath): - os.remove(filePath) - print(f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook.") + if export == "zip": + dataFrames = { + "dfModel": dfModel, + "dfTable": dfTable, + "dfP": dfP, + "colSize": colSize, + "temp": temp, + "dfR": dfR, + "dfH_filt": dfH_filt, + } + + zipFileName = f"{workspace}.{dataset}.zip" + + folderPath = "/lakehouse/default/Files" + subFolderPath = os.path.join(folderPath, "VertipaqAnalyzer") + ext = ".csv" + if not os.path.exists(subFolderPath): + os.makedirs(subFolderPath, exist_ok=True) + zipFilePath = os.path.join(subFolderPath, zipFileName) + + # Create CSV files based on dataframes + for fileName, df in dataFrames.items(): + filePath = os.path.join(subFolderPath, fileName + ext) + df.to_csv(filePath, index=False) + + # Create a zip file and add CSV files to it + with zipfile.ZipFile(zipFilePath, "w") as zipf: + for fileName in dataFrames: + filePath = os.path.join(subFolderPath, fileName + ext) + zipf.write(filePath, os.path.basename(filePath)) + + # Clean up: remove the individual CSV files + for fileName, df in dataFrames.items(): + filePath = os.path.join(subFolderPath, fileName) + ext + if os.path.exists(filePath): + os.remove(filePath) + print( + f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook." + ) + def visualize_vertipaq(dataframes): - + # Tooltips for columns within the visual data = [ - {'ViewName': 'Model', 'ColumnName': 'Dataset Name', 'Tooltip': 'The name of the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Total Size', 'Tooltip': 'The size of the model (in bytes)'}, - {'ViewName': 'Model', 'ColumnName': 'Table Count', 'Tooltip': 'The number of tables in the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Column Count', 'Tooltip': 'The number of columns in the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Compatibility Level', 'Tooltip': 'The compatibility level of the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Default Mode', 'Tooltip': 'The default query mode of the semantic model'}, - {'ViewName': 'Table', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Table', 'ColumnName': 'Type', 'Tooltip': 'The type of table'}, - {'ViewName': 'Table', 'ColumnName': 'Row Count', 'Tooltip': 'The number of rows in the table'}, - {'ViewName': 'Table', 'ColumnName': 'Total Size', 'Tooltip': 'Data Size + Dictionary Size + Hierarchy Size (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': 'Data Size', 'Tooltip': 'The size of the data for all the columns in this table (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': 'Dictionary Size', 'Tooltip': "The size of the column's dictionary for all columns in this table (in bytes)"}, - {'ViewName': 'Table', 'ColumnName': 'Hierarchy Size', 'Tooltip': 'The size of hierarchy structures for all columns in this table (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': '% DB', 'Tooltip': 'The size of the table relative to the size of the semantic model'}, - {'ViewName': 'Table', 'ColumnName': 'Partitions', 'Tooltip': 'The number of partitions in the table'}, - {'ViewName': 'Table', 'ColumnName': 'Columns', 'Tooltip': 'The number of columns in the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Partition Name', 'Tooltip': 'The name of the partition within the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Mode', 'Tooltip': 'The query mode of the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Record Count', 'Tooltip': 'The number of rows in the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Segment Count', 'Tooltip': 'The number of segments within the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Records per Segment', 'Tooltip': 'The number of rows per segment'}, - {'ViewName': 'Column', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Column', 'ColumnName': 'Column Name', 'Tooltip': 'The name of the column'}, - {'ViewName': 'Column', 'ColumnName': 'Type', 'Tooltip': 'The type of column'}, - {'ViewName': 'Column', 'ColumnName': 'Cardinality', 'Tooltip': 'The number of unique rows in the column'}, - {'ViewName': 'Column', 'ColumnName': 'Total Size', 'Tooltip': 'Data Size + Dictionary Size + Hierarchy Size (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': 'Data Size', 'Tooltip': 'The size of the data for the column (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': 'Dictionary Size', 'Tooltip': "The size of the column's dictionary (in bytes)"}, - {'ViewName': 'Column', 'ColumnName': 'Hierarchy Size', 'Tooltip': 'The size of hierarchy structures (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': '% Table', 'Tooltip': 'The size of the column relative to the size of the table'}, - {'ViewName': 'Column', 'ColumnName': '% DB', 'Tooltip': 'The size of the column relative to the size of the semantic model'}, - {'ViewName': 'Column', 'ColumnName': 'Data Type', 'Tooltip': 'The data type of the column'}, - {'ViewName': 'Column', 'ColumnName': 'Encoding', 'Tooltip': 'The encoding type for the column'}, - {'ViewName': 'Column', 'ColumnName': 'Is Resident', 'Tooltip': 'Indicates whether the column is in memory or not'}, - {'ViewName': 'Column', 'ColumnName': 'Temperature', 'Tooltip': 'A decimal indicating the frequency and recency of queries against the column'}, - {'ViewName': 'Column', 'ColumnName': 'Last Accessed', 'Tooltip': 'The time the column was last queried'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Hierarchy Name', 'Tooltip': 'The name of the hierarchy'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Used Size', 'Tooltip': 'The size of user hierarchy structures (in bytes)'}, - {'ViewName': 'Relationship', 'ColumnName': 'From Object', 'Tooltip': 'The from table/column in the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'To Object', 'Tooltip': 'The to table/column in the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Multiplicity', 'Tooltip': 'The cardinality on each side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Used Size', 'Tooltip': 'The size of the relationship (in bytes)'}, - {'ViewName': 'Relationship', 'ColumnName': 'Max From Cardinality', 'Tooltip': 'The number of unique values in the column used in the from side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Max To Cardinality', 'Tooltip': 'The number of unique values in the column used in the to side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Missing Rows', 'Tooltip': "The number of rows in the 'from' table which do not map to the key column in the 'to' table"} + { + "ViewName": "Model", + "ColumnName": "Dataset Name", + "Tooltip": "The name of the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Total Size", + "Tooltip": "The size of the model (in bytes)", + }, + { + "ViewName": "Model", + "ColumnName": "Table Count", + "Tooltip": "The number of tables in the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Column Count", + "Tooltip": "The number of columns in the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Compatibility Level", + "Tooltip": "The compatibility level of the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Default Mode", + "Tooltip": "The default query mode of the semantic model", + }, + { + "ViewName": "Table", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + {"ViewName": "Table", "ColumnName": "Type", "Tooltip": "The type of table"}, + { + "ViewName": "Table", + "ColumnName": "Row Count", + "Tooltip": "The number of rows in the table", + }, + { + "ViewName": "Table", + "ColumnName": "Total Size", + "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Data Size", + "Tooltip": "The size of the data for all the columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Dictionary Size", + "Tooltip": "The size of the column's dictionary for all columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Hierarchy Size", + "Tooltip": "The size of hierarchy structures for all columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "% DB", + "Tooltip": "The size of the table relative to the size of the semantic model", + }, + { + "ViewName": "Table", + "ColumnName": "Partitions", + "Tooltip": "The number of partitions in the table", + }, + { + "ViewName": "Table", + "ColumnName": "Columns", + "Tooltip": "The number of columns in the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Partition Name", + "Tooltip": "The name of the partition within the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Mode", + "Tooltip": "The query mode of the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Record Count", + "Tooltip": "The number of rows in the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Segment Count", + "Tooltip": "The number of segments within the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Records per Segment", + "Tooltip": "The number of rows per segment", + }, + { + "ViewName": "Column", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Column", + "ColumnName": "Column Name", + "Tooltip": "The name of the column", + }, + {"ViewName": "Column", "ColumnName": "Type", "Tooltip": "The type of column"}, + { + "ViewName": "Column", + "ColumnName": "Cardinality", + "Tooltip": "The number of unique rows in the column", + }, + { + "ViewName": "Column", + "ColumnName": "Total Size", + "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Data Size", + "Tooltip": "The size of the data for the column (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Dictionary Size", + "Tooltip": "The size of the column's dictionary (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Hierarchy Size", + "Tooltip": "The size of hierarchy structures (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "% Table", + "Tooltip": "The size of the column relative to the size of the table", + }, + { + "ViewName": "Column", + "ColumnName": "% DB", + "Tooltip": "The size of the column relative to the size of the semantic model", + }, + { + "ViewName": "Column", + "ColumnName": "Data Type", + "Tooltip": "The data type of the column", + }, + { + "ViewName": "Column", + "ColumnName": "Encoding", + "Tooltip": "The encoding type for the column", + }, + { + "ViewName": "Column", + "ColumnName": "Is Resident", + "Tooltip": "Indicates whether the column is in memory or not", + }, + { + "ViewName": "Column", + "ColumnName": "Temperature", + "Tooltip": "A decimal indicating the frequency and recency of queries against the column", + }, + { + "ViewName": "Column", + "ColumnName": "Last Accessed", + "Tooltip": "The time the column was last queried", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Hierarchy Name", + "Tooltip": "The name of the hierarchy", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Used Size", + "Tooltip": "The size of user hierarchy structures (in bytes)", + }, + { + "ViewName": "Relationship", + "ColumnName": "From Object", + "Tooltip": "The from table/column in the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "To Object", + "Tooltip": "The to table/column in the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Multiplicity", + "Tooltip": "The cardinality on each side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Used Size", + "Tooltip": "The size of the relationship (in bytes)", + }, + { + "ViewName": "Relationship", + "ColumnName": "Max From Cardinality", + "Tooltip": "The number of unique values in the column used in the from side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Max To Cardinality", + "Tooltip": "The number of unique values in the column used in the to side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Missing Rows", + "Tooltip": "The number of rows in the 'from' table which do not map to the key column in the 'to' table", + }, ] # Create DataFrame tooltipDF = pd.DataFrame(data) - #define the dictionary with {"Tab name":df} + # define the dictionary with {"Tab name":df} df_dict = { - "Model Summary":dataframes['dfModel'], - "Tables":dataframes['dfTable'], - "Partitions": dataframes['dfP'], - "Columns (Total Size)": dataframes['colSize'], - "Columns (Temperature)": dataframes['temp'], - "Relationships": dataframes['dfR'], - "Hierarchies": dataframes['dfH_filt'] - } + "Model Summary": dataframes["dfModel"], + "Tables": dataframes["dfTable"], + "Partitions": dataframes["dfP"], + "Columns (Total Size)": dataframes["colSize"], + "Columns (Temperature)": dataframes["temp"], + "Relationships": dataframes["dfR"], + "Hierarchies": dataframes["dfH_filt"], + } mapping = { - 'Model Summary': 'Model', - 'Tables': 'Table', - 'Partitions': 'Partition', - 'Columns (Total Size)': 'Column', - 'Columns (Temperature)': 'Column', - 'Relationships': 'Relationship', - 'Hierarchies': 'Hierarchy' -} + "Model Summary": "Model", + "Tables": "Table", + "Partitions": "Partition", + "Columns (Total Size)": "Column", + "Columns (Temperature)": "Column", + "Relationships": "Relationship", + "Hierarchies": "Hierarchy", + } # Basic styles for the tabs and tab content styles = """ @@ -505,10 +822,9 @@ def visualize_vertipaq(dataframes): """ - # HTML for tabs tab_html = '
' - content_html = '' + content_html = "" for i, (title, df) in enumerate(df_dict.items()): tab_id = f"tab{i}" tab_html += f'' @@ -519,23 +835,29 @@ def visualize_vertipaq(dataframes): for col in df.columns: tt = None try: - tooltipDF_filt = tooltipDF[(tooltipDF['ViewName'] == vw) & (tooltipDF['ColumnName'] == col)] - tt = tooltipDF_filt['Tooltip'].iloc[0] + tooltipDF_filt = tooltipDF[ + (tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col) + ] + tt = tooltipDF_filt["Tooltip"].iloc[0] except: pass - df_html = df_html.replace(f'{col}', f'{col}') - content_html += f'

{title}

{df_html}
' - tab_html += '
' + df_html = df_html.replace(f"{col}", f'{col}') + content_html += ( + f'

{title}

{df_html}
' + ) + tab_html += "" # Display the tabs, tab contents, and run the script display(HTML(styles + tab_html + content_html + script)) # Default to open the first tab - display(HTML("")) + display( + HTML("") + ) + @log def import_vertipaq_analyzer(folder_path: str, file_name: str): - - """ + """ Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse. Parameters @@ -550,22 +872,22 @@ def import_vertipaq_analyzer(folder_path: str, file_name: str): str A visualization of the Vertipaq Analyzer statistics. """ - - pd.options.mode.copy_on_write = True - zipFilePath = os.path.join(folder_path, file_name) - extracted_dir = os.path.join(folder_path, 'extracted_dataframes') + pd.options.mode.copy_on_write = True - with zipfile.ZipFile(zipFilePath, 'r') as zip_ref: - zip_ref.extractall(extracted_dir) + zipFilePath = os.path.join(folder_path, file_name) + extracted_dir = os.path.join(folder_path, "extracted_dataframes") - # Read all CSV files into a dictionary of DataFrames - dfs = {} - for file_name in zip_ref.namelist(): - df = pd.read_csv(extracted_dir + '/' + file_name) - dfs[file_name] = df + with zipfile.ZipFile(zipFilePath, "r") as zip_ref: + zip_ref.extractall(extracted_dir) - visualize_vertipaq(dfs) + # Read all CSV files into a dictionary of DataFrames + dfs = {} + for file_name in zip_ref.namelist(): + df = pd.read_csv(extracted_dir + "/" + file_name) + dfs[file_name] = df + + visualize_vertipaq(dfs) - # Clean up: remove the extracted directory - shutil.rmtree(extracted_dir) \ No newline at end of file + # Clean up: remove the extracted directory + shutil.rmtree(extracted_dir) diff --git a/sempy_labs/WarmCache.py b/sempy_labs/WarmCache.py index b4d340d0..eae67b1b 100644 --- a/sempy_labs/WarmCache.py +++ b/sempy_labs/WarmCache.py @@ -4,20 +4,21 @@ from tqdm.auto import tqdm import numpy as np import time -from .HelperFunctions import format_dax_object_name +from ._helper_functions import format_dax_object_name from .RefreshSemanticModel import refresh_semantic_model from .GetMeasureDependencies import get_measure_dependencies from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log -def warm_direct_lake_cache_perspective(dataset: str, perspective: str, add_dependencies: Optional[bool] = False, workspace: Optional[str] = None): - +def warm_direct_lake_cache_perspective( + dataset: str, + perspective: str, + add_dependencies: Optional[bool] = False, + workspace: Optional[str] = None, +): """ Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective. @@ -33,10 +34,10 @@ def warm_direct_lake_cache_perspective(dataset: str, perspective: str, add_depen The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if workspace == None: @@ -45,79 +46,109 @@ def warm_direct_lake_cache_perspective(dataset: str, perspective: str, add_depen else: workspace_id = fabric.resolve_workspace_id(workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"{red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode.") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode." + ) return - - dfPersp = fabric.list_perspectives(dataset = dataset, workspace = workspace) - dfPersp['DAX Object Name'] = format_dax_object_name(dfPersp['Table Name'], dfPersp['Object Name']) - dfPersp_filt = dfPersp[dfPersp['Perspective Name'] == perspective] + + dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace) + dfPersp["DAX Object Name"] = format_dax_object_name( + dfPersp["Table Name"], dfPersp["Object Name"] + ) + dfPersp_filt = dfPersp[dfPersp["Perspective Name"] == perspective] if len(dfPersp_filt) == 0: - print(f"{red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace.") + print( + f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace." + ) return - dfPersp_c = dfPersp_filt[dfPersp_filt['Object Type'] == 'Column'] + dfPersp_c = dfPersp_filt[dfPersp_filt["Object Type"] == "Column"] - column_values = dfPersp_c['DAX Object Name'].tolist() + column_values = dfPersp_c["DAX Object Name"].tolist() if add_dependencies: # Measure dependencies md = get_measure_dependencies(dataset, workspace) - md['Referenced Full Object'] = format_dax_object_name(md['Referenced Table'], md['Referenced Object']) - dfPersp_m = dfPersp_filt[(dfPersp_filt['Object Type'] == 'Measure')] - md_filt = md[(md['Object Name'].isin(dfPersp_m['Object Name'].values)) & (md['Referenced Object Type'] == 'Column')] - measureDep = md_filt['Referenced Full Object'].unique() + md["Referenced Full Object"] = format_dax_object_name( + md["Referenced Table"], md["Referenced Object"] + ) + dfPersp_m = dfPersp_filt[(dfPersp_filt["Object Type"] == "Measure")] + md_filt = md[ + (md["Object Name"].isin(dfPersp_m["Object Name"].values)) + & (md["Referenced Object Type"] == "Column") + ] + measureDep = md_filt["Referenced Full Object"].unique() # Hierarchy dependencies - dfPersp_h = dfPersp_filt[(dfPersp_filt['Object Type'] == 'Hierarchy')] - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfH['Hierarchy Object'] = format_dax_object_name(dfH['Table Name'], dfH['Hierarchy Name']) - dfH['Column Object'] = format_dax_object_name(dfH['Table Name'], dfH['Column Name']) - dfH_filt = dfH[dfH['Hierarchy Object'].isin(dfPersp_h['DAX Object Name'].values)] - hierarchyDep = dfH_filt['Column Object'].unique() + dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")] + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfH["Hierarchy Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Hierarchy Name"] + ) + dfH["Column Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Column Name"] + ) + dfH_filt = dfH[ + dfH["Hierarchy Object"].isin(dfPersp_h["DAX Object Name"].values) + ] + hierarchyDep = dfH_filt["Column Object"].unique() # Relationship dependencies - unique_table_names = dfPersp_filt['Table Name'].unique() - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - filtered_dfR = dfR[dfR['From Table'].isin(unique_table_names) & dfR['To Table'].isin(unique_table_names)] - - fromObjects = filtered_dfR['From Object'].unique() - toObjects = filtered_dfR['To Object'].unique() - - merged_list = np.concatenate([column_values, measureDep, hierarchyDep, fromObjects, toObjects]) + unique_table_names = dfPersp_filt["Table Name"].unique() + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + dfR["From Object"] = format_dax_object_name( + dfR["From Table"], dfR["From Column"] + ) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + filtered_dfR = dfR[ + dfR["From Table"].isin(unique_table_names) + & dfR["To Table"].isin(unique_table_names) + ] + + fromObjects = filtered_dfR["From Object"].unique() + toObjects = filtered_dfR["To Object"].unique() + + merged_list = np.concatenate( + [column_values, measureDep, hierarchyDep, fromObjects, toObjects] + ) merged_list_unique = list(set(merged_list)) else: merged_list_unique = column_values - df = pd.DataFrame(merged_list_unique, columns=['DAX Object Name']) - df[['Table Name', 'Column Name']] = df['DAX Object Name'].str.split('[', expand=True) - df['Table Name'] = df['Table Name'].str[1:-1] - df['Column Name'] = df['Column Name'].str[0:-1] + df = pd.DataFrame(merged_list_unique, columns=["DAX Object Name"]) + df[["Table Name", "Column Name"]] = df["DAX Object Name"].str.split( + "[", expand=True + ) + df["Table Name"] = df["Table Name"].str[1:-1] + df["Column Name"] = df["Column Name"].str[0:-1] - tbls = list(set(value.split('[')[0] for value in merged_list_unique)) + tbls = list(set(value.split("[")[0] for value in merged_list_unique)) for tableName in (bar := tqdm(tbls)): - filtered_list = [value for value in merged_list_unique if value.startswith(f"{tableName}[")] + filtered_list = [ + value for value in merged_list_unique if value.startswith(f"{tableName}[") + ] bar.set_description(f"Warming the '{tableName}' table...") - css = ','.join(map(str, filtered_list)) - dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))""" - x = fabric.evaluate_dax(dataset = dataset, dax_string = dax, workspace = workspace) - - print(f"{green_dot} The following columns have been put into memory:") + css = ",".join(map(str, filtered_list)) + dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" "" + x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace) + + print(f"{icons.green_dot} The following columns have been put into memory:") - new_column_order = ['Table Name', 'Column Name', 'DAX Object Name'] + new_column_order = ["Table Name", "Column Name", "DAX Object Name"] df = df.reindex(columns=new_column_order) - df = df[['Table Name', 'Column Name']].sort_values(by=['Table Name', 'Column Name'], ascending=True) - + df = df[["Table Name", "Column Name"]].sort_values( + by=["Table Name", "Column Name"], ascending=True + ) + return df + @log def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = None): - """ Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. @@ -129,11 +160,11 @@ def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = N The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - - """ + + """ if workspace == None: workspace_id = fabric.get_workspace_id() @@ -141,35 +172,45 @@ def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = N else: workspace_id = fabric.resolve_workspace_id(workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode.") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode." + ) return - + # Identify columns which are currently in memory (Is Resident = True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended = True) - dfC['DAX Object Name'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC_filtered = dfC[dfC['Is Resident']] + dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True) + dfC["DAX Object Name"] = format_dax_object_name( + dfC["Table Name"], dfC["Column Name"] + ) + dfC_filtered = dfC[dfC["Is Resident"]] if len(dfC_filtered) == 0: - print(f"{yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace.") + print( + f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace." + ) return # Refresh/frame dataset - refresh_semantic_model(dataset = dataset, refresh_type = 'full', workspace = workspace) + refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace) time.sleep(2) - tbls = dfC_filtered['Table Name'].unique() - column_values = dfC_filtered['DAX Object Name'].tolist() + tbls = dfC_filtered["Table Name"].unique() + column_values = dfC_filtered["DAX Object Name"].tolist() # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity) for tableName in (bar := tqdm(tbls)): bar.set_description(f"Warming the '{tableName}' table...") - css = ','.join(map(str, column_values)) - dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))""" - x = fabric.evaluate_dax(dataset = dataset, dax_string = dax, workspace = workspace) + css = ",".join(map(str, column_values)) + dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" "" + x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace) - print(f"{green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh.") + print( + f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh." + ) - return dfC_filtered[['Table Name', 'Column Name', 'Is Resident', 'Temperature']].sort_values(by=['Table Name', 'Column Name'], ascending=True) + return dfC_filtered[ + ["Table Name", "Column Name", "Is Resident", "Temperature"] + ].sort_values(by=["Table Name", "Column Name"], ascending=True) diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py index bc1d8850..29d98378 100644 --- a/sempy_labs/__init__.py +++ b/sempy_labs/__init__.py @@ -1 +1,27 @@ -from sempy_labs._clear_cache import clear_cache as clear_cache \ No newline at end of file +from sempy_labs._clear_cache import clear_cache as clear_cache +from sempy_labs._create_blank_semantic_model import ( + create_blank_semantic_model as create_blank_semantic_model, +) +from sempy_labs._create_pqt_file import create_pqt_file as create_pqt_file +from sempy_labs._fallback import check_fallback_reason as check_fallback_reason +from sempy_labs._generate_semantic_model import ( + create_semantic_model_from_bim as create_semantic_model_from_bim, + deploy_semantic_model as deploy_semantic_model, +) +from sempy_labs._list_functions import ( + get_object_level_security as get_object_level_security, +) +from sempy_labs._helper_functions import ( + resolve_lakehouse_name as resolve_lakehouse_name, + save_as_delta_table as save_as_delta_table, + generate_embedded_filter as generate_embedded_filter, + get_direct_lake_sql_endpoint as get_direct_lake_sql_endpoint, + resolve_lakehouse_id as resolve_lakehouse_id, + resolve_dataset_name as resolve_dataset_name, + resolve_dataset_id as resolve_dataset_id, + resolve_report_name as resolve_report_name, + resolve_report_id as resolve_report_id, + create_relationship_name as create_relationship_name, + format_dax_object_name as format_dax_object_name, + create_abfss_path as create_abfss_path, +) diff --git a/sempy_labs/_clear_cache.py b/sempy_labs/_clear_cache.py index 1b009444..426f339b 100644 --- a/sempy_labs/_clear_cache.py +++ b/sempy_labs/_clear_cache.py @@ -1,15 +1,11 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' def clear_cache(dataset: str, workspace: Optional[str] = None): - """ Clears the cache of a semantic model. @@ -21,17 +17,13 @@ def clear_cache(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - datasetID = resolve_dataset_id(dataset = dataset, workspace = workspace) + datasetID = resolve_dataset_id(dataset=dataset, workspace=workspace) xmla = f""" @@ -40,8 +32,8 @@ def clear_cache(dataset: str, workspace: Optional[str] = None): """ - fabric.execute_xmla(dataset = dataset,xmla_command=xmla, workspace = workspace) + fabric.execute_xmla(dataset=dataset, xmla_command=xmla, workspace=workspace) + + outputtext = f"{icons.green_dot} Cache cleared for the '{dataset}' semantic model within the '{workspace}' workspace." - outputtext = f"{green_dot} Cache cleared for the '{dataset}' semantic model within the '{workspace}' workspace." - - return outputtext \ No newline at end of file + return outputtext diff --git a/sempy_labs/CreateBlankSemanticModel.py b/sempy_labs/_create_blank_semantic_model.py similarity index 57% rename from sempy_labs/CreateBlankSemanticModel.py rename to sempy_labs/_create_blank_semantic_model.py index 80ada03f..af2b0bb1 100644 --- a/sempy_labs/CreateBlankSemanticModel.py +++ b/sempy_labs/_create_blank_semantic_model.py @@ -1,15 +1,15 @@ import sempy import sempy.fabric as fabric from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' -def create_blank_semantic_model(dataset: str, compatibility_level: Optional[int] = 1605, workspace: Optional[str] = None): - - """ +def create_blank_semantic_model( + dataset: str, + compatibility_level: Optional[int] = 1605, + workspace: Optional[str] = None, +): + """ Creates a new blank semantic model (no tables/columns etc.). Parameters @@ -23,21 +23,17 @@ def create_blank_semantic_model(dataset: str, compatibility_level: Optional[int] The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) - if compatibility_level < 1500: - print(f"{red_dot} Compatiblity level must be at least 1500.") - return + if compatibility_level < 1500: + print(f"{icons.red_dot} Compatiblity level must be at least 1500.") + return - tmsl = f''' + tmsl = f""" {{ "createOrReplace": {{ "object": {{ @@ -53,8 +49,10 @@ def create_blank_semantic_model(dataset: str, compatibility_level: Optional[int] }} }} }} - ''' + """ - fabric.execute_tmsl(script = tmsl, workspace = workspace) + fabric.execute_tmsl(script=tmsl, workspace=workspace) - return print(f"{green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace.") \ No newline at end of file + return print( + f"{icons.green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace." + ) diff --git a/sempy_labs/_create_pqt_file.py b/sempy_labs/_create_pqt_file.py new file mode 100644 index 00000000..63034882 --- /dev/null +++ b/sempy_labs/_create_pqt_file.py @@ -0,0 +1,238 @@ +import sempy.fabric as fabric +import json, os, shutil +import xml.etree.ElementTree as ET +from ._list_functions import list_tables +from sempy_labs.lakehouse import lakehouse_attached +from sempy._utils._log import log +from typing import Optional +import sempy_labs._icons as icons + + +@log +def create_pqt_file( + dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None +): + """ + Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + file_name : str, default=None + The name of the Power Query Template file to be generated. + Defaults to None which resolves to 'PowerQueryTemplate'. + """ + + if file_name is None: + file_name = "PowerQueryTemplate" + + lakeAttach = lakehouse_attached() + + if lakeAttach == False: + print( + f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + folderPath = "/lakehouse/default/Files" + subFolderPath = os.path.join(folderPath, "pqtnewfolder") + os.makedirs(subFolderPath, exist_ok=True) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfT = list_tables(dataset, workspace) + dfE = fabric.list_expressions(dataset=dataset, workspace=workspace) + + # Check if M-partitions are used + if any(dfP["Source Type"] == "M"): + + class QueryMetadata: + def __init__( + self, + QueryName, + QueryGroupId=None, + LastKnownIsParameter=None, + LastKnownResultTypeName=None, + LoadEnabled=True, + IsHidden=False, + ): + self.QueryName = QueryName + self.QueryGroupId = QueryGroupId + self.LastKnownIsParameter = LastKnownIsParameter + self.LastKnownResultTypeName = LastKnownResultTypeName + self.LoadEnabled = LoadEnabled + self.IsHidden = IsHidden + + class RootObject: + def __init__( + self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None + ): + if QueryGroups is None: + QueryGroups = [] + self.DocumentLocale = DocumentLocale + self.EngineVersion = EngineVersion + self.QueriesMetadata = QueriesMetadata + self.QueryGroups = QueryGroups + + # STEP 1: Create MashupDocument.pq + mdfileName = "MashupDocument.pq" + mdFilePath = os.path.join(subFolderPath, mdfileName) + sb = "section Section1;" + for table_name in dfP["Table Name"].unique(): + tName = '#"' + table_name + '"' + sourceExpression = dfT.loc[ + (dfT["Name"] == table_name), "Source Expression" + ].iloc[0] + refreshPolicy = dfT.loc[(dfT["Name"] == table_name), "Refresh Policy"].iloc[ + 0 + ] + sourceType = dfP.loc[(dfP["Table Name"] == table_name), "Source Type"].iloc[ + 0 + ] + + if sourceType == "M" or refreshPolicy: + sb = sb + "\n" + "shared " + tName + " = " + + partitions_in_table = dfP.loc[ + dfP["Table Name"] == table_name, "Partition Name" + ].unique() + + i = 1 + for partition_name in partitions_in_table: + pSourceType = dfP.loc[ + (dfP["Table Name"] == table_name) + & (dfP["Partition Name"] == partition_name), + "Source Type", + ].iloc[0] + pQuery = dfP.loc[ + (dfP["Table Name"] == table_name) + & (dfP["Partition Name"] == partition_name), + "Query", + ].iloc[0] + + if pQuery is not None: + pQueryNoSpaces = ( + pQuery.replace(" ", "") + .replace("\n", "") + .replace("\t", "") + .replace("\r", "") + ) + if pQueryNoSpaces.startswith('letSource=""'): + pQuery = 'let\n\tSource = ""\nin\n\tSource' + + if pSourceType == "M" and i == 1: + sb = sb + pQuery + ";" + elif refreshPolicy and i == 1: + sb = sb + sourceExpression + ";" + i += 1 + + for index, row in dfE.iterrows(): + expr = row["Expression"] + eName = row["Name"] + eName = '#"' + eName + '"' + sb = sb + "\n" + "shared " + eName + " = " + expr + ";" + + with open(mdFilePath, "w") as file: + file.write(sb) + + # STEP 2: Create the MashupMetadata.json file + mmfileName = "MashupMetadata.json" + mmFilePath = os.path.join(subFolderPath, mmfileName) + queryMetadata = [] + + for tName in dfP["Table Name"].unique(): + sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0] + refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0] + if sourceType == "M" or refreshPolicy: + queryMetadata.append( + QueryMetadata(tName, None, None, None, True, False) + ) + + for i, r in dfE.iterrows(): + eName = r["Name"] + eKind = r["Kind"] + if eKind == "M": + queryMetadata.append( + QueryMetadata(eName, None, None, None, True, False) + ) + else: + queryMetadata.append( + QueryMetadata(eName, None, None, None, False, False) + ) + + rootObject = RootObject("en-US", "2.126.453.0", queryMetadata) + + def obj_to_dict(obj): + if isinstance(obj, list): + return [obj_to_dict(e) for e in obj] + elif hasattr(obj, "__dict__"): + return {k: obj_to_dict(v) for k, v in obj.__dict__.items()} + else: + return obj + + jsonContent = json.dumps(obj_to_dict(rootObject), indent=4) + + with open(mmFilePath, "w") as json_file: + json_file.write(jsonContent) + + # STEP 3: Create Metadata.json file + mFileName = "Metadata.json" + mFilePath = os.path.join(subFolderPath, mFileName) + metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"} + jsonContent = json.dumps(metaData, indent=4) + + with open(mFilePath, "w") as json_file: + json_file.write(jsonContent) + + # STEP 4: Create [Content_Types].xml file: + ns = "http://schemas.openxmlformats.org/package/2006/content-types" + ET.register_namespace("", ns) + types = ET.Element("{%s}Types" % ns) + default1 = ET.SubElement( + types, + "{%s}Default" % ns, + {"Extension": "json", "ContentType": "application/json"}, + ) + default2 = ET.SubElement( + types, + "{%s}Default" % ns, + {"Extension": "pq", "ContentType": "application/x-ms-m"}, + ) + xmlDocument = ET.ElementTree(types) + xmlFileName = "[Content_Types].xml" + xmlFilePath = os.path.join(subFolderPath, xmlFileName) + xmlDocument.write( + xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml" + ) + + # STEP 5: Zip up the 4 files + zipFileName = file_name + ".zip" + zipFilePath = os.path.join(folderPath, zipFileName) + shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath) + + # STEP 6: Convert the zip file back into a .pqt file + newExt = ".pqt" + directory = os.path.dirname(zipFilePath) + fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0] + newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt) + shutil.move(zipFilePath, newFilePath) + + # STEP 7: Delete subFolder directory which is no longer needed + shutil.rmtree(subFolderPath, ignore_errors=True) + + print( + f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse." + ) + + else: + print( + f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated." + ) diff --git a/sempy_labs/Fallback.py b/sempy_labs/_fallback.py similarity index 55% rename from sempy_labs/Fallback.py rename to sempy_labs/_fallback.py index cad5ee80..38886b6a 100644 --- a/sempy_labs/Fallback.py +++ b/sempy_labs/_fallback.py @@ -3,8 +3,8 @@ import numpy as np from typing import List, Optional, Union -def check_fallback_reason(dataset: str, workspace: Optional[str] = None): +def check_fallback_reason(dataset: str, workspace: Optional[str] = None): """ Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery. @@ -27,31 +27,36 @@ def check_fallback_reason(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] + if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models.") + print( + f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models." + ) else: - df = fabric.evaluate_dax(dataset = dataset,workspace = workspace, - dax_string = - """ + df = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TableName] AS [Table Name],[FallbackReason] AS [FallbackReasonID] FROM $SYSTEM.TMSCHEMA_DELTA_TABLE_METADATA_STORAGES - """ - ) + """, + ) value_mapping = { - 0: 'No reason for fallback', - 1: 'This table is not framed', - 2: 'This object is a view in the lakehouse', - 3: 'The table does not exist in the lakehouse', - 4: 'Transient error', - 5: 'Using OLS will result in fallback to DQ', - 6: 'Using RLS will result in fallback to DQ' + 0: "No reason for fallback", + 1: "This table is not framed", + 2: "This object is a view in the lakehouse", + 3: "The table does not exist in the lakehouse", + 4: "Transient error", + 5: "Using OLS will result in fallback to DQ", + 6: "Using RLS will result in fallback to DQ", } # Create a new column based on the mapping - df['Fallback Reason Detail'] = np.vectorize(value_mapping.get)(df['FallbackReasonID']) - - return df \ No newline at end of file + df["Fallback Reason Detail"] = np.vectorize(value_mapping.get)( + df["FallbackReasonID"] + ) + + return df diff --git a/sempy_labs/GenerateSemanticModel.py b/sempy_labs/_generate_semantic_model.py similarity index 56% rename from sempy_labs/GenerateSemanticModel.py rename to sempy_labs/_generate_semantic_model.py index 7ed53dae..fd11a822 100644 --- a/sempy_labs/GenerateSemanticModel.py +++ b/sempy_labs/_generate_semantic_model.py @@ -2,10 +2,12 @@ import sempy.fabric as fabric import json, base64, time from .GetSemanticModelBim import get_semantic_model_bim -from typing import List, Optional, Union +from typing import Optional -def create_semantic_model_from_bim(dataset: str, bim_file: str, workspace: Optional[str] = None): +def create_semantic_model_from_bim( + dataset: str, bim_file: str, workspace: Optional[str] = None +): """ Creates a new semantic model based on a Model.bim file. @@ -19,10 +21,6 @@ def create_semantic_model_from_bim(dataset: str, bim_file: str, workspace: Optio The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: @@ -31,70 +29,77 @@ def create_semantic_model_from_bim(dataset: str, bim_file: str, workspace: Optio else: workspace_id = fabric.resolve_workspace_id(workspace) - objectType = 'SemanticModel' + objectType = "SemanticModel" - dfI = fabric.list_items(workspace = workspace, type = objectType) - dfI_filt = dfI[(dfI['Display Name'] == dataset)] + dfI = fabric.list_items(workspace=workspace, type=objectType) + dfI_filt = dfI[(dfI["Display Name"] == dataset)] if len(dfI_filt) > 0: - print(f"WARNING: '{dataset}' already exists as a semantic model in the '{workspace}' workspace.") + print( + f"WARNING: '{dataset}' already exists as a semantic model in the '{workspace}' workspace." + ) return client = fabric.FabricRestClient() - defPBIDataset = { - "version": "1.0", - "settings": {} - } + defPBIDataset = {"version": "1.0", "settings": {}} def conv_b64(file): - + loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + return f payloadPBIDefinition = conv_b64(defPBIDataset) payloadBim = conv_b64(bim_file) request_body = { - 'displayName': dataset, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "model.bim", - "payload": payloadBim, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbidataset", - "payload": payloadPBIDefinition, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/items",json=request_body) + "displayName": dataset, + "type": objectType, + "definition": { + "parts": [ + { + "path": "model.bim", + "payload": payloadBim, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbidataset", + "payload": payloadPBIDefinition, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post(f"/v1/workspaces/{workspace_id}/items", json=request_body) if response.status_code == 201: - print(f"The '{dataset}' semantic model has been created within the '{workspace}' workspace.") + print( + f"The '{dataset}' semantic model has been created within the '{workspace}' workspace." + ) print(response.json()) elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] + operationId = response.headers["x-ms-operation-id"] response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": time.sleep(3) response = client.get(f"/v1/operations/{operationId}") response_body = json.loads(response.content) response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{dataset}' semantic model has been created within the '{workspace}' workspace.") + print( + f"The '{dataset}' semantic model has been created within the '{workspace}' workspace." + ) print(response.json()) -def deploy_semantic_model(dataset: str, new_dataset: Optional[str] = None, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): +def deploy_semantic_model( + dataset: str, + new_dataset: Optional[str] = None, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): """ Deploys a semantic model based on an existing semantic model. @@ -115,8 +120,8 @@ def deploy_semantic_model(dataset: str, new_dataset: Optional[str] = None, works Returns ------- - - """ + + """ if workspace == None: workspace_id = fabric.get_workspace_id() @@ -129,9 +134,13 @@ def deploy_semantic_model(dataset: str, new_dataset: Optional[str] = None, works new_dataset = dataset if new_dataset == dataset and new_dataset_workspace == workspace: - print(f"The 'dataset' and 'new_dataset' parameters have the same value. And, the 'workspace' and 'new_dataset_workspace' parameters have the same value. At least one of these must be different. Please update the parameters.") + print( + f"The 'dataset' and 'new_dataset' parameters have the same value. And, the 'workspace' and 'new_dataset_workspace' parameters have the same value. At least one of these must be different. Please update the parameters." + ) return - bim = get_semantic_model_bim(dataset = dataset, workspace = workspace) + bim = get_semantic_model_bim(dataset=dataset, workspace=workspace) - create_semantic_model_from_bim(dataset = new_dataset, bim_file = bim, workspace = new_dataset_workspace) \ No newline at end of file + create_semantic_model_from_bim( + dataset=new_dataset, bim_file=bim, workspace=new_dataset_workspace + ) diff --git a/sempy_labs/HelperFunctions.py b/sempy_labs/_helper_functions.py similarity index 64% rename from sempy_labs/HelperFunctions.py rename to sempy_labs/_helper_functions.py index d6a8ebf1..46a36f18 100644 --- a/sempy_labs/HelperFunctions.py +++ b/sempy_labs/_helper_functions.py @@ -1,17 +1,14 @@ -import sempy import sempy.fabric as fabric import re from pyspark.sql import SparkSession -from typing import List, Optional, Union +from typing import Optional from uuid import UUID +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def create_abfss_path(lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str): +def create_abfss_path( + lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str +): """ Creates an abfss path for a delta table in a Fabric lakehouse. @@ -32,8 +29,8 @@ def create_abfss_path(lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_ta return f"abfss://{lakehouse_workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{delta_table_name}" -def format_dax_object_name(a: str,b: str): +def format_dax_object_name(a: str, b: str): """ Formats a table/column combination to the 'Table Name'[Column Name] format. @@ -49,11 +46,13 @@ def format_dax_object_name(a: str,b: str): str The fully qualified object name. """ - + return "'" + a + "'[" + b + "]" -def create_relationship_name(from_table: str, from_column: str, to_table: str, to_column: str): +def create_relationship_name( + from_table: str, from_column: str, to_table: str, to_column: str +): """ Formats a relationship's table/columns into a fully qualified name. @@ -71,13 +70,17 @@ def create_relationship_name(from_table: str, from_column: str, to_table: str, t Returns ------- str - The fully qualified relationship name. + The fully qualified relationship name. """ - return format_dax_object_name(from_table, from_column) + ' -> ' + format_dax_object_name(to_table, to_column) + return ( + format_dax_object_name(from_table, from_column) + + " -> " + + format_dax_object_name(to_table, to_column) + ) -def resolve_report_id(report: str, workspace: Optional[str] = None): +def resolve_report_id(report: str, workspace: Optional[str] = None): """ Obtains the ID of the Power BI report. @@ -95,22 +98,22 @@ def resolve_report_id(report: str, workspace: Optional[str] = None): UUID The ID of the Power BI report. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = report, type = 'Report', workspace = workspace) + obj = fabric.resolve_item_id(item_name=report, type="Report", workspace=workspace) - #objectType = 'Report' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == report)] - #obj = dfI_filt['Id'].iloc[0] + # objectType = 'Report' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Display Name'] == report)] + # obj = dfI_filt['Id'].iloc[0] return obj -def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): +def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the Power BI report. @@ -128,23 +131,24 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): str The name of the Power BI report. """ - - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = report_id, type = 'Report', workspace = workspace) + obj = fabric.resolve_item_name( + item_id=report_id, type="Report", workspace=workspace + ) - #objectType = 'Report' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == report_id)] - #obj = dfI_filt['Display Name'].iloc[0] + # objectType = 'Report' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Id'] == report_id)] + # obj = dfI_filt['Display Name'].iloc[0] return obj -def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): +def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): """ Obtains the ID of the semantic model. @@ -162,22 +166,24 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): UUID The ID of the semantic model. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = dataset, type = 'SemanticModel', workspace = workspace) + obj = fabric.resolve_item_id( + item_name=dataset, type="SemanticModel", workspace=workspace + ) - #objectType = 'SemanticModel' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == dataset)] - #obj = dfI_filt['Id'].iloc[0] + # objectType = 'SemanticModel' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Display Name'] == dataset)] + # obj = dfI_filt['Id'].iloc[0] return obj -def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): +def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the semantic model. @@ -195,22 +201,24 @@ def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): str The name of the semantic model. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = dataset_id, type = 'SemanticModel', workspace = workspace) + obj = fabric.resolve_item_name( + item_id=dataset_id, type="SemanticModel", workspace=workspace + ) - #objectType = 'SemanticModel' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == dataset_id)] - #obj = dfI_filt['Display Name'].iloc[0] + # objectType = 'SemanticModel' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Id'] == dataset_id)] + # obj = dfI_filt['Display Name'].iloc[0] return obj -def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): +def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the Fabric lakehouse. @@ -228,27 +236,29 @@ def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): str The name of the Fabric lakehouse. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = lakehouse_id, type = 'Lakehouse', workspace = workspace) + obj = fabric.resolve_item_name( + item_id=lakehouse_id, type="Lakehouse", workspace=workspace + ) - #objectType = 'Lakehouse' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == lakehouse_id)] + # objectType = 'Lakehouse' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Id'] == lakehouse_id)] - #if len(dfI_filt) == 0: + # if len(dfI_filt) == 0: # print(f"The '{lakehouse_id}' Lakehouse Id does not exist within the '{workspace}' workspace.") # return - - #obj = dfI_filt['Display Name'].iloc[0] + + # obj = dfI_filt['Display Name'].iloc[0] return obj -def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): +def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): """ Obtains the ID of the Fabric lakehouse. @@ -266,27 +276,29 @@ def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): UUID The ID of the Fabric lakehouse. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = lakehouse, type = 'Lakehouse', workspace = workspace) + obj = fabric.resolve_item_id( + item_name=lakehouse, type="Lakehouse", workspace=workspace + ) - #objectType = 'Lakehouse' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] + # objectType = 'Lakehouse' + # dfI = fabric.list_items(workspace = workspace, type = objectType) + # dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] - #if len(dfI_filt) == 0: + # if len(dfI_filt) == 0: # print(f"The '{lakehouse}' lakehouse does not exist within the '{workspace}' workspace.") # return - - #obj = dfI_filt['Id'].iloc[0] + + # obj = dfI_filt['Id'].iloc[0] return obj -def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): +def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): """ Obtains the SQL Endpoint ID of the semantic model. @@ -309,24 +321,26 @@ def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode.") + print( + f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode." + ) return - - dfE = fabric.list_expressions(dataset = dataset, workspace = workspace) - dfE_filt = dfE[dfE['Name']== 'DatabaseQuery'] - expr = dfE_filt['Expression'].iloc[0] + + dfE = fabric.list_expressions(dataset=dataset, workspace=workspace) + dfE_filt = dfE[dfE["Name"] == "DatabaseQuery"] + expr = dfE_filt["Expression"].iloc[0] matches = re.findall(r'"([^"]*)"', expr) sqlEndpointId = matches[1] - + return sqlEndpointId -def generate_embedded_filter(filter: str): +def generate_embedded_filter(filter: str): """ Converts the filter expression to a filter expression which can be used by a Power BI embedded URL. @@ -344,27 +358,60 @@ def generate_embedded_filter(filter: str): pattern = r"'[^']+'\[[^\[]+\]" matches = re.findall(pattern, filter) for match in matches: - matchReplace = match.replace("'",'').replace('[','/').replace(']','')\ - .replace(' ','_x0020_').replace('@','_00x40_').replace('+','_0x2B_').replace('{','_007B_').replace('}','_007D_') + matchReplace = ( + match.replace("'", "") + .replace("[", "/") + .replace("]", "") + .replace(" ", "_x0020_") + .replace("@", "_00x40_") + .replace("+", "_0x2B_") + .replace("{", "_007B_") + .replace("}", "_007D_") + ) filter = filter.replace(match, matchReplace) - + pattern = r"\[[^\[]+\]" matches = re.findall(pattern, filter) for match in matches: - matchReplace = match.replace("'",'').replace('[','/').replace(']','')\ - .replace(' ','_x0020_').replace('@','_00x40_').replace('+','_0x2B_').replace('{','_007B_').replace('}','_007D_') + matchReplace = ( + match.replace("'", "") + .replace("[", "/") + .replace("]", "") + .replace(" ", "_x0020_") + .replace("@", "_00x40_") + .replace("+", "_0x2B_") + .replace("{", "_007B_") + .replace("}", "_007D_") + ) filter = filter.replace(match, matchReplace) - revised_filter = filter.replace('<=','le').replace('>=','ge').replace('<>','ne').replace('!=','ne')\ - .replace('==','eq').replace('=','eq').replace('<','lt').replace('>','gt')\ - .replace(' && ',' and ').replace(' & ',' and ')\ - .replace(' || ',' or ').replace(' | ',' or ')\ - .replace('{','(').replace('}',')') - + revised_filter = ( + filter.replace("<=", "le") + .replace(">=", "ge") + .replace("<>", "ne") + .replace("!=", "ne") + .replace("==", "eq") + .replace("=", "eq") + .replace("<", "lt") + .replace(">", "gt") + .replace(" && ", " and ") + .replace(" & ", " and ") + .replace(" || ", " or ") + .replace(" | ", " or ") + .replace("{", "(") + .replace("}", ")") + ) + return revised_filter -def save_as_delta_table(dataframe, delta_table_name: str, write_mode: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def save_as_delta_table( + dataframe, + delta_table_name: str, + write_mode: str, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): """ Saves a pandas dataframe as a delta table in a Fabric lakehouse. @@ -398,26 +445,38 @@ def save_as_delta_table(dataframe, delta_table_name: str, write_mode: str, lakeh if lakehouse is None: lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id=lakehouse_id, workspace=workspace) + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) else: lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - writeModes = ['append', 'overwrite'] + writeModes = ["append", "overwrite"] write_mode = write_mode.lower() if write_mode not in writeModes: - print(f"{red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {writeModes}.") + print( + f"{icons.red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {writeModes}." + ) return - if ' ' in delta_table_name: - print(f"{red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names.") + if " " in delta_table_name: + print( + f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names." + ) return - - dataframe.columns = dataframe.columns.str.replace(' ', '_') + + dataframe.columns = dataframe.columns.str.replace(" ", "_") spark = SparkSession.builder.getOrCreate() spark_df = spark.createDataFrame(dataframe) - filePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = workspace_id, delta_table_name = delta_table_name) - spark_df.write.mode(write_mode).format('delta').save(filePath) - print(f"{green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace.") \ No newline at end of file + filePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=workspace_id, + delta_table_name=delta_table_name, + ) + spark_df.write.mode(write_mode).format("delta").save(filePath) + print( + f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace." + ) diff --git a/sempy_labs/_icons.py b/sempy_labs/_icons.py new file mode 100644 index 00000000..2547eb5f --- /dev/null +++ b/sempy_labs/_icons.py @@ -0,0 +1,4 @@ +green_dot = "\U0001F7E2" +yellow_dot = "\U0001F7E1" +red_dot = "\U0001F534" +in_progress = "⌛" diff --git a/sempy_labs/ListFunctions.py b/sempy_labs/_list_functions.py similarity index 55% rename from sempy_labs/ListFunctions.py rename to sempy_labs/_list_functions.py index 6e63a69a..f2fe7c2f 100644 --- a/sempy_labs/ListFunctions.py +++ b/sempy_labs/_list_functions.py @@ -1,13 +1,11 @@ -import sempy import sempy.fabric as fabric import pandas as pd import json, time from pyspark.sql import SparkSession -from .GetDirectLakeLakehouse import get_direct_lake_lakehouse from typing import List, Optional, Union -def get_object_level_security(dataset: str, workspace: Optional[str] = None): +def get_object_level_security(dataset: str, workspace: Optional[str] = None): """ Shows the object level security for the semantic model. @@ -19,7 +17,7 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -33,7 +31,7 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Role Name', 'Object Type', 'Table Name', 'Object Name']) + df = pd.DataFrame(columns=["Role Name", "Object Type", "Table Name", "Object Name"]) for r in m.Roles: for tp in r.TablePermissions: @@ -41,18 +39,32 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): columnCount = len(tp.ColumnPermissions) objectType = "Table" if columnCount == 0: - new_data = {'Role Name': r.Name, 'Object Type': objectType, 'Table Name': tp.Name, 'Object Name': tp.Name} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Role Name": r.Name, + "Object Type": objectType, + "Table Name": tp.Name, + "Object Name": tp.Name, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) else: objectType = "Column" for cp in tp.ColumnPermissions: - new_data = {'Role Name': r.Name, 'Object Type': objectType, 'Table Name': tp.Name, 'Object Name': cp.Name} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Role Name": r.Name, + "Object Type": objectType, + "Table Name": tp.Name, + "Object Name": cp.Name, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df -def list_tables(dataset: str, workspace: Optional[str] = None): +def list_tables(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's tables and their properties. @@ -64,7 +76,7 @@ def list_tables(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -78,7 +90,17 @@ def list_tables(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Name', 'Type', 'Hidden', 'Data Category', 'Description', 'Refresh Policy', 'Source Expression']) + df = pd.DataFrame( + columns=[ + "Name", + "Type", + "Hidden", + "Data Category", + "Description", + "Refresh Policy", + "Source Expression", + ] + ) for t in m.Tables: tableType = "Table" @@ -94,13 +116,21 @@ def list_tables(dataset: str, workspace: Optional[str] = None): if rPolicy: sourceExpression = t.RefreshPolicy.SourceExpression - new_data = {'Name': t.Name, 'Type': tableType, 'Hidden': t.IsHidden, 'Data Category': t.DataCategory, 'Description': t.Description, 'Refresh Policy': rPolicy, 'Source Expression': sourceExpression} + new_data = { + "Name": t.Name, + "Type": tableType, + "Hidden": t.IsHidden, + "Data Category": t.DataCategory, + "Description": t.Description, + "Refresh Policy": rPolicy, + "Source Expression": sourceExpression, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_annotations(dataset: str, workspace: Optional[str] = None): +def list_annotations(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's annotations and their properties. @@ -112,7 +142,7 @@ def list_annotations(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -126,108 +156,201 @@ def list_annotations(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Object Name', 'Parent Object Name', 'Object Type', 'Annotation Name', 'Annotation Value']) + df = pd.DataFrame( + columns=[ + "Object Name", + "Parent Object Name", + "Object Type", + "Annotation Name", + "Annotation Value", + ] + ) mName = m.Name - for a in m.Annotations: - objectType = 'Model' + for a in m.Annotations: + objectType = "Model" aName = a.Name aValue = a.Value - new_data = {'Object Name': mName, 'Parent Object Name': "N/A", 'Object Type': objectType,'Annotation Name': aName, 'Annotation Value': aValue} + new_data = { + "Object Name": mName, + "Parent Object Name": "N/A", + "Object Type": objectType, + "Annotation Name": aName, + "Annotation Value": aValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for t in m.Tables: - objectType = 'Table' + objectType = "Table" tName = t.Name for ta in t.Annotations: taName = ta.Name taValue = ta.Value - new_data = {'Object Name': tName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': taName, 'Annotation Value': taValue} + new_data = { + "Object Name": tName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": taName, + "Annotation Value": taValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for p in t.Partitions: pName = p.Name - objectType = 'Partition' + objectType = "Partition" for pa in p.Annotations: paName = paName paValue = paValue - new_data = {'Object Name': pName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': paName, 'Annotation Value': paValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": pName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": paName, + "Annotation Value": paValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for c in t.Columns: - objectType = 'Column' - cName = c.Name + objectType = "Column" + cName = c.Name for ca in c.Annotations: caName = ca.Name caValue = ca.Value - new_data = {'Object Name': cName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': caName, 'Annotation Value': caValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": cName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": caName, + "Annotation Value": caValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for ms in t.Measures: - objectType = 'Measure' + objectType = "Measure" measName = ms.Name for ma in ms.Annotations: maName = ma.Name maValue = ma.Value - new_data = {'Object Name': measName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': maName, 'Annotation Value': maValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": measName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": maName, + "Annotation Value": maValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for h in t.Hierarchies: - objectType = 'Hierarchy' + objectType = "Hierarchy" hName = h.Name for ha in h.Annotations: haName = ha.Name haValue = ha.Value - new_data = {'Object Name': hName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': haName, 'Annotation Value': haValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": hName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": haName, + "Annotation Value": haValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for d in m.DataSources: dName = d.Name - objectType = 'Data Source' + objectType = "Data Source" for da in d.Annotations: daName = da.Name daValue = da.Value - new_data = {'Object Name': dName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': daName, 'Annotation Value': daValue} + new_data = { + "Object Name": dName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": daName, + "Annotation Value": daValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for r in m.Relationships: rName = r.Name - objectType = 'Relationship' + objectType = "Relationship" for ra in r.Annotations: raName = ra.Name raValue = ra.Value - new_data = {'Object Name': rName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': raName, 'Annotation Value': raValue} + new_data = { + "Object Name": rName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": raName, + "Annotation Value": raValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for cul in m.Cultures: culName = cul.Name - objectType = 'Translation' + objectType = "Translation" for cula in cul.Annotations: culaName = cula.Name culaValue = cula.Value - new_data = {'Object Name': culName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': culaName, 'Annotation Value': culaValue} + new_data = { + "Object Name": culName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": culaName, + "Annotation Value": culaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for e in m.Expressions: eName = e.Name - objectType = 'Expression' + objectType = "Expression" for ea in e.Annotations: eaName = ea.Name eaValue = ea.Value - new_data = {'Object Name': eName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': eaName, 'Annotation Value': eaValue} + new_data = { + "Object Name": eName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": eaName, + "Annotation Value": eaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for per in m.Perspectives: perName = per.Name - objectType = 'Perspective' + objectType = "Perspective" for pera in per.Annotations: peraName = pera.Name peraValue = pera.Value - new_data = {'Object Name': perName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': peraName, 'Annotation Value': peraValue} + new_data = { + "Object Name": perName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": peraName, + "Annotation Value": peraValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for rol in m.Roles: rolName = rol.Name - objectType = 'Role' + objectType = "Role" for rola in rol.Annotations: rolaName = rola.Name rolaValue = rola.Value - new_data = {'Object Name': rolName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': rolaName, 'Annotation Value': rolaValue} + new_data = { + "Object Name": rolName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": rolaName, + "Annotation Value": rolaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def list_columns( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Shows a semantic model's columns and their properties. @@ -246,42 +369,60 @@ def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optio The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the semantic model's columns and their properties. """ + from sempy_labs.directlake._get_directlake_lakehouse import ( + get_direct_lake_lakehouse, + ) if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) if isDirectLake: - dfC['Column Cardinality'] = None + dfC["Column Cardinality"] = None sql_statements = [] - lakeID, lakeName = get_direct_lake_lakehouse(dataset = dataset, workspace = workspace, lakehouse = lakehouse, lakehouse_workspace = lakehouse_workspace) - - for table_name in dfC['Table Name'].unique(): + lakeID, lakeName = get_direct_lake_lakehouse( + dataset=dataset, + workspace=workspace, + lakehouse=lakehouse, + lakehouse_workspace=lakehouse_workspace, + ) + + for table_name in dfC["Table Name"].unique(): print(f"Gathering stats for table: '{table_name}'...") - query = 'SELECT ' - - columns_in_table = dfC.loc[dfC['Table Name'] == table_name, 'Column Name'].unique() - + query = "SELECT " + + columns_in_table = dfC.loc[ + dfC["Table Name"] == table_name, "Column Name" + ].unique() + # Loop through columns within those tables for column_name in columns_in_table: - scName = dfC.loc[(dfC['Table Name'] == table_name) & (dfC['Column Name'] == column_name), 'Source'].iloc[0] - lakeTName = dfC.loc[(dfC['Table Name'] == table_name) & (dfC['Column Name'] == column_name), 'Query'].iloc[0] + scName = dfC.loc[ + (dfC["Table Name"] == table_name) + & (dfC["Column Name"] == column_name), + "Source", + ].iloc[0] + lakeTName = dfC.loc[ + (dfC["Table Name"] == table_name) + & (dfC["Column Name"] == column_name), + "Query", + ].iloc[0] # Build the query to be executed dynamically query = query + f"COUNT(DISTINCT({scName})) AS {scName}, " - + query = query[:-2] query = query + f" FROM {lakehouse}.{lakeTName}" sql_statements.append((table_name, query)) @@ -294,20 +435,20 @@ def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optio # Run the query df = spark.sql(query) - + for column in df.columns: x = df.collect()[0][column] for i, r in dfC.iterrows(): - if r['Table Name'] == tName and r['Source'] == column: - dfC.at[i, 'Column Cardinality'] = x + if r["Table Name"] == tName and r["Source"] == column: + dfC.at[i, "Column Cardinality"] = x # Remove column added temporarily - dfC.drop(columns=['Query'], inplace=True) + dfC.drop(columns=["Query"], inplace=True) return dfC -def list_dashboards(workspace: Optional[str] = None): +def list_dashboards(workspace: Optional[str] = None): """ Shows a list of the dashboards within a workspace. @@ -317,16 +458,27 @@ def list_dashboards(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the dashboards within a workspace. """ - df = pd.DataFrame(columns=['Dashboard ID', 'Dashboard Name', 'Read Only', 'Web URL', 'Embed URL', 'Data Classification', 'Users', 'Subscriptions']) - - if workspace == 'None': + df = pd.DataFrame( + columns=[ + "Dashboard ID", + "Dashboard Name", + "Read Only", + "Web URL", + "Embed URL", + "Data Classification", + "Users", + "Subscriptions", + ] + ) + + if workspace == "None": workspace_id = fabric.get_workspace_id() workspace = fabric.resovle_workspace_name(workspace_id) else: @@ -335,25 +487,34 @@ def list_dashboards(workspace: Optional[str] = None): client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/groups/{workspace_id}/dashboards") - for v in response.json()['value']: - dashboardID = v['id'] - displayName = v['displayName'] - isReadOnly = v['isReadOnly'] - webURL = v['webUrl'] - embedURL = v['embedUrl'] - dataClass = v['dataClassification'] - users = v['users'] - subs = v['subscriptions'] - - new_data = {'Dashboard ID': dashboardID, 'Dashboard Name': displayName, 'Read Only': isReadOnly, 'Web URL': webURL, 'Embed URL': embedURL, 'Data Classification': dataClass, 'Users': [users], 'Subscriptions': [subs]} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for v in response.json()["value"]: + dashboardID = v["id"] + displayName = v["displayName"] + isReadOnly = v["isReadOnly"] + webURL = v["webUrl"] + embedURL = v["embedUrl"] + dataClass = v["dataClassification"] + users = v["users"] + subs = v["subscriptions"] + + new_data = { + "Dashboard ID": dashboardID, + "Dashboard Name": displayName, + "Read Only": isReadOnly, + "Web URL": webURL, + "Embed URL": embedURL, + "Data Classification": dataClass, + "Users": [users], + "Subscriptions": [subs], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Read Only'] = df['Read Only'].astype(bool) + df["Read Only"] = df["Read Only"].astype(bool) return df -def list_lakehouses(workspace: Optional[str] = None): +def list_lakehouses(workspace: Optional[str] = None): """ Shows the lakehouses within a workspace. @@ -363,14 +524,25 @@ def list_lakehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the lakehouses within a workspace. """ - df = pd.DataFrame(columns=['Lakehouse Name', 'Lakehouse ID', 'Description', 'OneLake Tables Path', 'OneLake Files Path', 'SQL Endpoint Connection String', 'SQL Endpoint ID', 'SQL Endpoint Provisioning Status']) + df = pd.DataFrame( + columns=[ + "Lakehouse Name", + "Lakehouse ID", + "Description", + "OneLake Tables Path", + "OneLake Files Path", + "SQL Endpoint Connection String", + "SQL Endpoint ID", + "SQL Endpoint Provisioning Status", + ] + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -380,26 +552,35 @@ def list_lakehouses(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/") - - for v in response.json()['value']: - lakehouseId = v['id'] - lakehouseName = v['displayName'] - lakehouseDesc = v['description'] - prop = v['properties'] - oneLakeTP = prop['oneLakeTablesPath'] - oneLakeFP = prop['oneLakeFilesPath'] - sqlEPProp = prop['sqlEndpointProperties'] - sqlEPCS = sqlEPProp['connectionString'] - sqlepid = sqlEPProp['id'] - sqlepstatus = sqlEPProp['provisioningStatus'] - - new_data = {'Lakehouse Name': lakehouseName, 'Lakehouse ID': lakehouseId, 'Description': lakehouseDesc, 'OneLake Tables Path': oneLakeTP, 'OneLake Files Path': oneLakeFP, 'SQL Endpoint Connection String': sqlEPCS, 'SQL Endpoint ID': sqlepid, 'SQL Endpoint Provisioning Status': sqlepstatus} + + for v in response.json()["value"]: + lakehouseId = v["id"] + lakehouseName = v["displayName"] + lakehouseDesc = v["description"] + prop = v["properties"] + oneLakeTP = prop["oneLakeTablesPath"] + oneLakeFP = prop["oneLakeFilesPath"] + sqlEPProp = prop["sqlEndpointProperties"] + sqlEPCS = sqlEPProp["connectionString"] + sqlepid = sqlEPProp["id"] + sqlepstatus = sqlEPProp["provisioningStatus"] + + new_data = { + "Lakehouse Name": lakehouseName, + "Lakehouse ID": lakehouseId, + "Description": lakehouseDesc, + "OneLake Tables Path": oneLakeTP, + "OneLake Files Path": oneLakeFP, + "SQL Endpoint Connection String": sqlEPCS, + "SQL Endpoint ID": sqlepid, + "SQL Endpoint Provisioning Status": sqlepstatus, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_warehouses(workspace: Optional[str] = None): +def list_warehouses(workspace: Optional[str] = None): """ Shows the warehouses within a workspace. @@ -409,14 +590,23 @@ def list_warehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the warehouses within a workspace. """ - df = pd.DataFrame(columns=['Warehouse Name', 'Warehouse ID', 'Description', 'Connection Info', 'Created Date', 'Last Updated Time']) + df = pd.DataFrame( + columns=[ + "Warehouse Name", + "Warehouse ID", + "Description", + "Connection Info", + "Created Date", + "Last Updated Time", + ] + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -426,23 +616,30 @@ def list_warehouses(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/warehouses/") - - for v in response.json()['value']: - warehouse_id = v['id'] - warehouse_name = v['displayName'] - desc = v['description'] - prop = v['properties'] - connInfo = prop['connectionInfo'] - createdDate = prop['createdDate'] - lastUpdate = prop['lastUpdatedTime'] - - new_data = {'Warehouse Name': warehouse_name, 'Warehouse ID': warehouse_id, 'Description': desc, 'Connection Info': connInfo, 'Created Date': createdDate, 'Last Updated Time': lastUpdate} + + for v in response.json()["value"]: + warehouse_id = v["id"] + warehouse_name = v["displayName"] + desc = v["description"] + prop = v["properties"] + connInfo = prop["connectionInfo"] + createdDate = prop["createdDate"] + lastUpdate = prop["lastUpdatedTime"] + + new_data = { + "Warehouse Name": warehouse_name, + "Warehouse ID": warehouse_id, + "Description": desc, + "Connection Info": connInfo, + "Created Date": createdDate, + "Last Updated Time": lastUpdate, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_sqlendpoints(workspace: Optional[str] = None): +def list_sqlendpoints(workspace: Optional[str] = None): """ Shows the SQL Endpoints within a workspace. @@ -452,14 +649,14 @@ def list_sqlendpoints(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the SQL Endpoints within a workspace. """ - df = pd.DataFrame(columns=['SQL Endpoint ID', 'SQL Endpoint Name', 'Description']) + df = pd.DataFrame(columns=["SQL Endpoint ID", "SQL Endpoint Name", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -469,19 +666,23 @@ def list_sqlendpoints(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/sqlEndpoints/") - - for v in response.json()['value']: - sql_id = v['id'] - lake_name = v['displayName'] - desc = v['description'] - new_data = {'SQL Endpoint ID': sql_id, 'SQL Endpoint Name': lake_name, 'Description': desc} + for v in response.json()["value"]: + sql_id = v["id"] + lake_name = v["displayName"] + desc = v["description"] + + new_data = { + "SQL Endpoint ID": sql_id, + "SQL Endpoint Name": lake_name, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mirroredwarehouses(workspace: Optional[str] = None): +def list_mirroredwarehouses(workspace: Optional[str] = None): """ Shows the mirrored warehouses within a workspace. @@ -491,14 +692,16 @@ def list_mirroredwarehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the mirrored warehouses within a workspace. """ - df = pd.DataFrame(columns=['Mirrored Warehouse', 'Mirrored Warehouse ID', 'Description']) + df = pd.DataFrame( + columns=["Mirrored Warehouse", "Mirrored Warehouse ID", "Description"] + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -508,19 +711,23 @@ def list_mirroredwarehouses(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mirroredWarehouses/") - - for v in response.json()['value']: - mirr_id = v['id'] - dbname = v['displayName'] - desc = v['description'] - new_data = {'Mirrored Warehouse': dbname, 'Mirrored Warehouse ID': mirr_id, 'Description': desc} + for v in response.json()["value"]: + mirr_id = v["id"] + dbname = v["displayName"] + desc = v["description"] + + new_data = { + "Mirrored Warehouse": dbname, + "Mirrored Warehouse ID": mirr_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_kqldatabases(workspace: Optional[str] = None): +def list_kqldatabases(workspace: Optional[str] = None): """ Shows the KQL databases within a workspace. @@ -530,14 +737,24 @@ def list_kqldatabases(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the KQL Databases within a workspace. """ - df = pd.DataFrame(columns=['KQL Database Name', 'KQL Database ID', 'Description', 'Parent Eventhouse Item ID', 'Query Service URI', 'Ingestion Service URI', 'Kusto Database Type']) + df = pd.DataFrame( + columns=[ + "KQL Database Name", + "KQL Database ID", + "Description", + "Parent Eventhouse Item ID", + "Query Service URI", + "Ingestion Service URI", + "Kusto Database Type", + ] + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -547,24 +764,32 @@ def list_kqldatabases(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlDatabases/") - - for v in response.json()['value']: - kql_id = v['id'] - kql_name = v['displayName'] - desc = v['description'] - prop = v['properties'] - eventId = prop['parentEventhouseItemId'] - qsURI = prop['queryServiceUri'] - isURI = prop['ingestionServiceUri'] - dbType = prop['kustoDatabaseType'] - - new_data = {'KQL Database Name': kql_name, 'KQL Database ID': kql_id, 'Description': desc, 'Parent Eventhouse Item ID': eventId, 'Query Service URI': qsURI, 'Ingestion Service URI': isURI, 'Kusto Database Type': dbType} + + for v in response.json()["value"]: + kql_id = v["id"] + kql_name = v["displayName"] + desc = v["description"] + prop = v["properties"] + eventId = prop["parentEventhouseItemId"] + qsURI = prop["queryServiceUri"] + isURI = prop["ingestionServiceUri"] + dbType = prop["kustoDatabaseType"] + + new_data = { + "KQL Database Name": kql_name, + "KQL Database ID": kql_id, + "Description": desc, + "Parent Eventhouse Item ID": eventId, + "Query Service URI": qsURI, + "Ingestion Service URI": isURI, + "Kusto Database Type": dbType, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_kqlquerysets(workspace: Optional[str] = None): +def list_kqlquerysets(workspace: Optional[str] = None): """ Shows the KQL Querysets within a workspace. @@ -574,14 +799,14 @@ def list_kqlquerysets(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the KQL Querysets within a workspace. """ - df = pd.DataFrame(columns=['KQL Queryset Name', 'KQL Queryset ID', 'Description']) + df = pd.DataFrame(columns=["KQL Queryset Name", "KQL Queryset ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -591,19 +816,23 @@ def list_kqlquerysets(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlQuerysets/") - - for v in response.json()['value']: - kql_id = v['id'] - kql_name = v['displayName'] - desc = v['description'] - new_data = {'KQL Queryset Name': kql_name, 'KQL Queryset ID': kql_id, 'Description': desc} + for v in response.json()["value"]: + kql_id = v["id"] + kql_name = v["displayName"] + desc = v["description"] + + new_data = { + "KQL Queryset Name": kql_name, + "KQL Queryset ID": kql_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mlmodels(workspace: Optional[str] = None): +def list_mlmodels(workspace: Optional[str] = None): """ Shows the ML models within a workspace. @@ -613,14 +842,14 @@ def list_mlmodels(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the ML models within a workspace. """ - df = pd.DataFrame(columns=['ML Model Name', 'ML Model ID', 'Description']) + df = pd.DataFrame(columns=["ML Model Name", "ML Model ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -630,19 +859,23 @@ def list_mlmodels(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlModels/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'ML Model Name': modelName, 'ML Model ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "ML Model Name": modelName, + "ML Model ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_eventstreams(workspace: Optional[str] = None): +def list_eventstreams(workspace: Optional[str] = None): """ Shows the eventstreams within a workspace. @@ -652,14 +885,14 @@ def list_eventstreams(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the eventstreams within a workspace. """ - df = pd.DataFrame(columns=['Eventstream Name', 'Eventstream ID', 'Description']) + df = pd.DataFrame(columns=["Eventstream Name", "Eventstream ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -669,19 +902,23 @@ def list_eventstreams(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/eventstreams/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Eventstream Name': modelName, 'Eventstream ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Eventstream Name": modelName, + "Eventstream ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_datapipelines(workspace: Optional[str] = None): +def list_datapipelines(workspace: Optional[str] = None): """ Shows the data pipelines within a workspace. @@ -691,14 +928,14 @@ def list_datapipelines(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the data pipelines within a workspace. """ - df = pd.DataFrame(columns=['Data Pipeline Name', 'Data Pipeline ID', 'Description']) + df = pd.DataFrame(columns=["Data Pipeline Name", "Data Pipeline ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -708,19 +945,23 @@ def list_datapipelines(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/dataPipelines/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Data Pipeline Name': modelName, 'Data Pipeline ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Data Pipeline Name": modelName, + "Data Pipeline ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mlexperiments(workspace: Optional[str] = None): +def list_mlexperiments(workspace: Optional[str] = None): """ Shows the ML experiments within a workspace. @@ -730,14 +971,14 @@ def list_mlexperiments(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the ML experiments within a workspace. """ - df = pd.DataFrame(columns=['ML Experiment Name', 'ML Experiment ID', 'Description']) + df = pd.DataFrame(columns=["ML Experiment Name", "ML Experiment ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -747,19 +988,23 @@ def list_mlexperiments(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlExperiments/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'ML Experiment Name': modelName, 'ML Experiment ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "ML Experiment Name": modelName, + "ML Experiment ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_datamarts(workspace: Optional[str] = None): +def list_datamarts(workspace: Optional[str] = None): """ Shows the datamarts within a workspace. @@ -769,14 +1014,14 @@ def list_datamarts(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the datamarts within a workspace. """ - df = pd.DataFrame(columns=['Datamart Name', 'Datamart ID', 'Description']) + df = pd.DataFrame(columns=["Datamart Name", "Datamart ID", "Description"]) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -786,19 +1031,25 @@ def list_datamarts(workspace: Optional[str] = None): client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/datamarts/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Datamart Name': modelName, 'Datamart ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Datamart Name": modelName, + "Datamart ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def create_warehouse(warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None): +def create_warehouse( + warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None +): """ Creates a Fabric warehouse. @@ -812,48 +1063,57 @@ def create_warehouse(warehouse: str, description: Optional[str] = None, workspac The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace_id = fabric.resolve_workspace_id(workspace) if description == None: - request_body = { - "displayName": warehouse - } + request_body = {"displayName": warehouse} else: - request_body = { - "displayName": warehouse, - "description": description - } + request_body = {"displayName": warehouse, "description": description} client = fabric.FabricRestClient() - response = client.post(f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body) + response = client.post( + f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body + ) if response.status_code == 201: - print(f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace.") + print( + f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + ) elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] + operationId = response.headers["x-ms-operation-id"] response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": time.sleep(3) response = client.get(f"/v1/operations/{operationId}") response_body = json.loads(response.content) response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace.") + print( + f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + ) else: - print(f"ERROR: Failed to create the '{warehouse}' warehouse within the '{workspace}' workspace.") - -def update_item(item_type: str, current_name: str, new_name: str, description: Optional[str] = None, workspace:Optional[str] = None): - + print( + f"ERROR: Failed to create the '{warehouse}' warehouse within the '{workspace}' workspace." + ) + + +def update_item( + item_type: str, + current_name: str, + new_name: str, + description: Optional[str] = None, + workspace: Optional[str] = None, +): """ Updates the name/description of a Fabric item. @@ -871,10 +1131,10 @@ def update_item(item_type: str, current_name: str, new_name: str, description: O The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if workspace == None: @@ -884,57 +1144,64 @@ def update_item(item_type: str, current_name: str, new_name: str, description: O workspace_id = fabric.resolve_workspace_id(workspace) itemTypes = { - 'DataPipeline': 'dataPipelines', - 'Eventstream': 'eventstreams', - 'KQLDatabase': 'kqlDatabases', - 'KQLQueryset': 'kqlQuerysets', - 'Lakehouse': 'lakehouses', - 'MLExperiment': 'mlExperiments', - 'MLModel': 'mlModels', - 'Notebook': 'notebooks', - 'Warehouse': 'warehouses', + "DataPipeline": "dataPipelines", + "Eventstream": "eventstreams", + "KQLDatabase": "kqlDatabases", + "KQLQueryset": "kqlQuerysets", + "Lakehouse": "lakehouses", + "MLExperiment": "mlExperiments", + "MLModel": "mlModels", + "Notebook": "notebooks", + "Warehouse": "warehouses", } - item_type = item_type.replace(' ','').capitalize() + item_type = item_type.replace(" ", "").capitalize() if item_type not in itemTypes.keys(): print(f"The '{item_type}' is not a valid item type. ") return - + itemType = itemTypes[item_type] - dfI = fabric.list_items(workspace = workspace, type = item_type) - dfI_filt = dfI[(dfI['Display Name'] == current_name)] + dfI = fabric.list_items(workspace=workspace, type=item_type) + dfI_filt = dfI[(dfI["Display Name"] == current_name)] if len(dfI_filt) == 0: - print(f"The '{current_name}' {item_type} does not exist within the '{workspace}' workspace.") + print( + f"The '{current_name}' {item_type} does not exist within the '{workspace}' workspace." + ) return - - itemId = dfI_filt['Id'].iloc[0] + + itemId = dfI_filt["Id"].iloc[0] if description == None: - request_body = { - "displayName": new_name - } + request_body = {"displayName": new_name} else: - request_body = { - "displayName": new_name, - "description": description - } + request_body = {"displayName": new_name, "description": description} client = fabric.FabricRestClient() - response = client.patch(f"/v1/workspaces/{workspace_id}/{itemType}/{itemId}", json=request_body) + response = client.patch( + f"/v1/workspaces/{workspace_id}/{itemType}/{itemId}", json=request_body + ) if response.status_code == 200: if description == None: - print(f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}'") + print( + f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}'" + ) else: - print(f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}' and have a description of '{description}'") + print( + f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}' and have a description of '{description}'" + ) else: - print(f"ERROR: The '{current_name}' {item_type} within the '{workspace}' workspace was not updateds.") + print( + f"ERROR: The '{current_name}' {item_type} within the '{workspace}' workspace was not updateds." + ) -def list_relationships(dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False): +def list_relationships( + dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False +): """ Shows a semantic model's relationships and their properties. @@ -948,7 +1215,7 @@ def list_relationships(dataset: str, workspace: Optional[str] = None, extended: or if no lakehouse attached, resolves to the workspace of the notebook. extended : bool, default=False Fetches extended column information. - + Returns ------- pandas.DataFrame @@ -958,83 +1225,104 @@ def list_relationships(dataset: str, workspace: Optional[str] = None, extended: if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) + + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) if extended: # Used to map the Relationship IDs rel = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [ID] AS [RelationshipID] ,[Name] FROM $SYSTEM.TMSCHEMA_RELATIONSHIPS - """) + """, + ) # USED_SIZE shows the Relationship Size where TABLE_ID starts with R$ cs = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TABLE_ID] ,[USED_SIZE] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMN_SEGMENTS - """) + """, + ) def parse_value(text): - ind = text.rfind('(') + 1 + ind = text.rfind("(") + 1 output = text[ind:] output = output[:-1] return output - cs['RelationshipID'] = cs['TABLE_ID'].apply(parse_value).astype('uint64') - relcs = pd.merge(cs[['RelationshipID', 'TABLE_ID', 'USED_SIZE']], rel, on='RelationshipID', how='left') + cs["RelationshipID"] = cs["TABLE_ID"].apply(parse_value).astype("uint64") + relcs = pd.merge( + cs[["RelationshipID", "TABLE_ID", "USED_SIZE"]], + rel, + on="RelationshipID", + how="left", + ) - dfR['Used Size'] = None + dfR["Used Size"] = None for i, r in dfR.iterrows(): - relName = r['Relationship Name'] + relName = r["Relationship Name"] + + filtered_cs = relcs[ + (relcs["Name"] == relName) & (relcs["TABLE_ID"].str.startswith("R$")) + ] + sumval = filtered_cs["USED_SIZE"].sum() + dfR.at[i, "Used Size"] = sumval - filtered_cs = relcs[(relcs['Name'] == relName) & (relcs['TABLE_ID'].str.startswith("R$"))] - sumval = filtered_cs['USED_SIZE'].sum() - dfR.at[i, 'Used Size'] = sumval + dfR["Used Size"] = dfR["Used Size"].astype("int") - dfR['Used Size'] = dfR['Used Size'].astype('int') - return dfR -def list_dataflow_storage_accounts(): +def list_dataflow_storage_accounts(): """ Shows the accessible dataflow storage accounts. Parameters ---------- - + Returns ------- pandas.DataFrame A pandas dataframe showing the accessible dataflow storage accounts. """ - df = pd.DataFrame(columns=['Dataflow Storage Account ID', 'Dataflow Storage Account Name', 'Enabled']) + df = pd.DataFrame( + columns=[ + "Dataflow Storage Account ID", + "Dataflow Storage Account Name", + "Enabled", + ] + ) client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/dataflowStorageAccounts") - - for v in response.json()['value']: - dfsaId = v['id'] - dfsaName = v['name'] - isEnabled = v['isEnabled'] - - new_data = {'Dataflow Storage Account ID': dfsaId, 'Dataflow Storage Account Name': dfsaName, 'Enabled': isEnabled} + + for v in response.json()["value"]: + dfsaId = v["id"] + dfsaName = v["name"] + isEnabled = v["isEnabled"] + + new_data = { + "Dataflow Storage Account ID": dfsaId, + "Dataflow Storage Account Name": dfsaName, + "Enabled": isEnabled, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Enabled'] = df['Enabled'].astype(bool) + df["Enabled"] = df["Enabled"].astype(bool) return df -def list_kpis(dataset: str, workspace: Optional[str] = None): +def list_kpis(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's KPIs and their properties. @@ -1055,20 +1343,50 @@ def list_kpis(dataset: str, workspace: Optional[str] = None): from .TOM import connect_semantic_model - with connect_semantic_model(dataset = dataset, workspace = workspace, readonly = True) as tom: - - df = pd.DataFrame(columns=['Table Name', 'Measure Name', 'Target Expression', 'Target Format String', 'Target Description', 'Status Expression', 'Status Graphic', 'Status Description', 'Trend Expression', 'Trend Graphic', 'Trend Description']) + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + + df = pd.DataFrame( + columns=[ + "Table Name", + "Measure Name", + "Target Expression", + "Target Format String", + "Target Description", + "Status Expression", + "Status Graphic", + "Status Description", + "Trend Expression", + "Trend Graphic", + "Trend Description", + ] + ) for t in tom.model.Tables: for m in t.Measures: if m.KPI is not None: - new_data = {'Table Name': t.Name, 'Measure Name': m.Name, 'Target Expression': m.KPI.TargetExpression, 'Target Format String': m.KPI.TargetFormatString, 'Target Description': m.KPI.TargetDescription, 'Status Graphic': m.KPI.StatusGraphic, 'Status Expression': m.KPI.StatusExpression, 'Status Description': m.KPI.StatusDescription, 'Trend Expression': m.KPI.TrendExpression, 'Trend Graphic': m.KPI.TrendGraphic, 'Trend Description': m.KPI.TrendDescription} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Table Name": t.Name, + "Measure Name": m.Name, + "Target Expression": m.KPI.TargetExpression, + "Target Format String": m.KPI.TargetFormatString, + "Target Description": m.KPI.TargetDescription, + "Status Graphic": m.KPI.StatusGraphic, + "Status Expression": m.KPI.StatusExpression, + "Status Description": m.KPI.StatusDescription, + "Trend Expression": m.KPI.TrendExpression, + "Trend Graphic": m.KPI.TrendGraphic, + "Trend Description": m.KPI.TrendDescription, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df - -def list_workspace_role_assignments(workspace: Optional[str] = None): + +def list_workspace_role_assignments(workspace: Optional[str] = None): """ Shows the members of a given workspace. @@ -1078,7 +1396,7 @@ def list_workspace_role_assignments(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -1091,18 +1409,23 @@ def list_workspace_role_assignments(workspace: Optional[str] = None): else: workspace_id = fabric.resolve_workspace_id(workspace) - df = pd.DataFrame(columns=['User Name', 'User Email', 'Role Name', 'Type']) + df = pd.DataFrame(columns=["User Name", "User Email", "Role Name", "Type"]) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/roleAssignments") - for i in response.json()['value']: - user_name = i['principal']['displayName'] - role_name = i['role'] - user_email = i['principal']['userDetails']['userPrincipalName'] - user_type = i['principal']['type'] - - new_data = {'User Name': user_name, 'Role Name': role_name, 'Type': user_type, 'User Email': user_email} + for i in response.json()["value"]: + user_name = i["principal"]["displayName"] + role_name = i["role"] + user_email = i["principal"]["userDetails"]["userPrincipalName"] + user_type = i["principal"]["type"] + + new_data = { + "User Name": user_name, + "Role Name": role_name, + "Type": user_type, + "User Email": user_email, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/sempy_labs/directlake/__init__.py b/sempy_labs/directlake/__init__.py new file mode 100644 index 00000000..de59562e --- /dev/null +++ b/sempy_labs/directlake/__init__.py @@ -0,0 +1,24 @@ +from sempy_labs.directlake._directlake_schema_compare import ( + direct_lake_schema_compare as direct_lake_schema_compare, +) +from sempy_labs.directlake._directlake_schema_sync import ( + direct_lake_schema_sync as direct_lake_schema_sync, +) +from sempy_labs.directlake._get_directlake_lakehouse import ( + get_directlake_lakehouse as get_directlake_lakehouse, +) +from sempy_labs.directlake._get_directlake_model_calc_tables import ( + list_directlake_model_calc_tables as list_directlake_model_calc_tables, +) +from sempy_labs.directlake._show_unsupported_directlake_objects import ( + show_unsupported_direct_lake_objects as show_unsupported_direct_lake_objects, +) +from sempy_labs.directlake._update_directlake_model_lakehouse_connection import ( + update_direct_lake_model_lakehouse_connection as update_direct_lake_model_lakehouse_connection, +) +from sempy_labs.directlake._update_directlake_partition_entity import ( + update_direct_lake_partition_entity as update_direct_lake_partition_entity, +) +from sempy_labs.directlake._get_shared_expression import ( + get_shared_expression as get_shared_expression, +) diff --git a/sempy_labs/directlake/_directlake_schema_compare.py b/sempy_labs/directlake/_directlake_schema_compare.py new file mode 100644 index 00000000..f5b83aa1 --- /dev/null +++ b/sempy_labs/directlake/_directlake_schema_compare.py @@ -0,0 +1,108 @@ +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import ( + format_dax_object_name, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) +from sempy_labs.lakehouse import get_lakehouse_columns +from sempy_labs._list_functions import list_tables +from typing import Optional + + +def direct_lake_schema_compare( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if lakehouse_workspace is None: + lakehouse_workspace = workspace + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] + + if len(dfI_filt) == 0: + print( + f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified." + ) + return + + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print(f"The '{dataset}' semantic model is not in Direct Lake mode.") + return + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) + + dfT.rename(columns={"Type": "Table Type"}, inplace=True) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] + dfC = pd.merge(dfC, dfP[["Table Name", "Query"]], on="Table Name", how="inner") + dfC = pd.merge( + dfC, + dfT[["Name", "Table Type"]], + left_on="Table Name", + right_on="Name", + how="inner", + ) + dfC["Full Column Name"] = format_dax_object_name(dfC["Query"], dfC["Source"]) + dfC_filt = dfC[dfC["Table Type"] == "Table"] + # Schema compare + missingtbls = dfP_filt[~dfP_filt["Query"].isin(lc["Table Name"])] + missingtbls = missingtbls[["Table Name", "Query"]] + missingtbls.rename(columns={"Query": "Source Table"}, inplace=True) + missingcols = dfC_filt[~dfC_filt["Full Column Name"].isin(lc["Full Column Name"])] + missingcols = missingcols[ + ["Table Name", "Column Name", "Type", "Data Type", "Source"] + ] + missingcols.rename(columns={"Source": "Source Column"}, inplace=True) + + if len(missingtbls) == 0: + print( + f"All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + else: + print( + f"The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + display(missingtbls) + if len(missingcols) == 0: + print( + f"All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + else: + print( + f"The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + display(missingcols) diff --git a/sempy_labs/DirectLakeSchemaSync.py b/sempy_labs/directlake/_directlake_schema_sync.py similarity index 50% rename from sempy_labs/DirectLakeSchemaSync.py rename to sempy_labs/directlake/_directlake_schema_sync.py index 8ea4017e..70c7a1a5 100644 --- a/sempy_labs/DirectLakeSchemaSync.py +++ b/sempy_labs/directlake/_directlake_schema_sync.py @@ -1,14 +1,24 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .GetLakehouseColumns import get_lakehouse_columns -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint -from typing import List, Optional, Union +from .lakehouse.GetLakehouseColumns import get_lakehouse_columns +from sempy_labs._helper_functions import ( + format_dax_object_name, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) +from typing import Optional from sempy._utils._log import log -@log -def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_to_model: Optional[bool] = False, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +@log +def direct_lake_schema_sync( + dataset: str, + workspace: Optional[str] = None, + add_to_model: Optional[bool] = False, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model). @@ -29,17 +39,12 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM import System - if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) @@ -53,46 +58,52 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] if len(dfI_filt) == 0: - print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.") + print( + f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified." + ) return - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Source Type'] == 'Entity'] - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfC_filt = dfC[dfC['Table Name'].isin(dfP_filt['Table Name'].values)] - dfC_filt = pd.merge(dfC_filt, dfP_filt[['Table Name', 'Query']], on = 'Table Name', how = 'left') - dfC_filt['Column Object'] = format_dax_object_name(dfC_filt['Query'], dfC_filt['Source']) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Source Type"] == "Entity"] + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfC_filt = dfC[dfC["Table Name"].isin(dfP_filt["Table Name"].values)] + dfC_filt = pd.merge( + dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left" + ) + dfC_filt["Column Object"] = format_dax_object_name( + dfC_filt["Query"], dfC_filt["Source"] + ) lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) - lc_filt = lc[lc['Table Name'].isin(dfP_filt['Query'].values)] + lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)] mapping = { - 'string': 'String', - 'bigint': 'Int64', - 'int': 'Int64', - 'smallint': 'Int64', - 'boolean': 'Boolean', - 'timestamp': 'DateTime', - 'date': 'DateTime', - 'decimal(38,18)': 'Decimal', - 'double': 'Double' + "string": "String", + "bigint": "Int64", + "int": "Int64", + "smallint": "Int64", + "boolean": "Boolean", + "timestamp": "DateTime", + "date": "DateTime", + "decimal(38,18)": "Decimal", + "double": "Double", } tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model for i, r in lc_filt.iterrows(): - lakeTName = r['Table Name'] - lakeCName = r['Column Name'] - fullColName = r['Full Column Name'] - dType = r['Data Type'] - - if fullColName not in dfC_filt['Column Object'].values: - dfL = dfP_filt[dfP_filt['Query'] == lakeTName] - tName = dfL['Table Name'].iloc[0] + lakeTName = r["Table Name"] + lakeCName = r["Column Name"] + fullColName = r["Full Column Name"] + dType = r["Data Type"] + + if fullColName not in dfC_filt["Column Object"].values: + dfL = dfP_filt[dfP_filt["Query"] == lakeTName] + tName = dfL["Table Name"].iloc[0] if add_to_model: col = TOM.DataColumn() col.Name = lakeCName @@ -101,11 +112,17 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t try: col.DataType = System.Enum.Parse(TOM.DataType, dt) except: - print(f"ERROR: '{dType}' data type is not mapped properly to the semantic model data types.") + print( + f"ERROR: '{dType}' data type is not mapped properly to the semantic model data types." + ) return m.Tables[tName].Columns.Add(col) - print(f"The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' data type within the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' data type within the '{dataset}' semantic model within the '{workspace}' workspace." + ) else: - print(f"The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace." + ) m.SaveChanges() diff --git a/sempy_labs/GetDirectLakeLakehouse.py b/sempy_labs/directlake/_get_directlake_lakehouse.py similarity index 69% rename from sempy_labs/GetDirectLakeLakehouse.py rename to sempy_labs/directlake/_get_directlake_lakehouse.py index 8ef8fb46..1fa8f778 100644 --- a/sempy_labs/GetDirectLakeLakehouse.py +++ b/sempy_labs/directlake/_get_directlake_lakehouse.py @@ -1,11 +1,20 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_lakehouse_id, resolve_lakehouse_name, get_direct_lake_sql_endpoint +from sempy_labs._helper_functions import ( + resolve_lakehouse_id, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) from typing import List, Optional, Union from uuid import UUID -def get_direct_lake_lakehouse(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def get_direct_lake_lakehouse( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Identifies the lakehouse used by a Direct Lake semantic model. @@ -29,7 +38,7 @@ def get_direct_lake_lakehouse(dataset: str, workspace: Optional[str] = None, lak ------- str, UUID The lakehouse name and lakehouse ID. - """ + """ if workspace == None: workspace_id = fabric.get_workspace_id() @@ -39,26 +48,25 @@ def get_direct_lake_lakehouse(dataset: str, workspace: Optional[str] = None, lak if lakehouse_workspace is None: lakehouse_workspace = workspace - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: - print(f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode.") + print( + f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode." + ) else: sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[dfI['Id'] == sqlEndpointId] - lakehouseName = dfI_filt['Display Name'].iloc[0] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[dfI["Id"] == sqlEndpointId] + lakehouseName = dfI_filt["Display Name"].iloc[0] lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace) return lakehouseName, lakehouseId - - - diff --git a/sempy_labs/GetSharedExpression.py b/sempy_labs/directlake/_get_shared_expression.py similarity index 54% rename from sempy_labs/GetSharedExpression.py rename to sempy_labs/directlake/_get_shared_expression.py index 032cf288..6b366273 100644 --- a/sempy_labs/GetSharedExpression.py +++ b/sempy_labs/directlake/_get_shared_expression.py @@ -1,11 +1,12 @@ -import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_lakehouse_name -from .ListFunctions import list_lakehouses -from typing import List, Optional, Union +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._list_functions import list_lakehouses +from typing import Optional -def get_shared_expression(lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def get_shared_expression( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +): """ Dynamically generates the M expression used by a Direct Lake model for a given lakehouse. @@ -34,17 +35,25 @@ def get_shared_expression(lakehouse: Optional[str] = None, workspace: Optional[s lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id) - dfL = list_lakehouses(workspace = workspace) - lakeDetail = dfL[dfL['Lakehouse Name'] == lakehouse] + dfL = list_lakehouses(workspace=workspace) + lakeDetail = dfL[dfL["Lakehouse Name"] == lakehouse] - sqlEPCS = lakeDetail['SQL Endpoint Connection String'].iloc[0] - sqlepid = lakeDetail['SQL Endpoint ID'].iloc[0] - provStatus = lakeDetail['SQL Endpoint Provisioning Status'].iloc[0] + sqlEPCS = lakeDetail["SQL Endpoint Connection String"].iloc[0] + sqlepid = lakeDetail["SQL Endpoint ID"].iloc[0] + provStatus = lakeDetail["SQL Endpoint Provisioning Status"].iloc[0] - if provStatus == 'InProgress': - print(f"The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned.") + if provStatus == "InProgress": + print( + f"The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned." + ) return - - sh = 'let\n\tdatabase = Sql.Database("' + sqlEPCS + '", "' + sqlepid + '")\nin\n\tdatabase' - return sh \ No newline at end of file + sh = ( + 'let\n\tdatabase = Sql.Database("' + + sqlEPCS + + '", "' + + sqlepid + + '")\nin\n\tdatabase' + ) + + return sh diff --git a/sempy_labs/ListDirectLakeModelCalcTables.py b/sempy_labs/directlake/_list_directlake_model_calc_tables.py similarity index 68% rename from sempy_labs/ListDirectLakeModelCalcTables.py rename to sempy_labs/directlake/_list_directlake_model_calc_tables.py index 3eac1966..77a0463e 100644 --- a/sempy_labs/ListDirectLakeModelCalcTables.py +++ b/sempy_labs/directlake/_list_directlake_model_calc_tables.py @@ -1,13 +1,13 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .ListFunctions import list_tables, list_annotations -from typing import List, Optional, Union +from sempy_labs._list_functions import list_tables, list_annotations +from typing import Optional from sempy._utils._log import log + @log def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] = None): - """ Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery. @@ -30,23 +30,25 @@ def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] = workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - df = pd.DataFrame(columns=['Table Name', 'Source Expression']) + df = pd.DataFrame(columns=["Table Name", "Source Expression"]) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: print(f"The '{dataset}' semantic model is not in Direct Lake mode.") else: dfA = list_annotations(dataset, workspace) dfT = list_tables(dataset, workspace) - dfA_filt = dfA[(dfA['Object Type'] == 'Model') & (dfA['Annotation Name'].isin(dfT['Name']))] + dfA_filt = dfA[ + (dfA["Object Type"] == "Model") & (dfA["Annotation Name"].isin(dfT["Name"])) + ] - for i,r in dfA_filt.iterrows(): - tName = r['Annotation Name'] - se = r['Annotation Value'] + for i, r in dfA_filt.iterrows(): + tName = r["Annotation Name"] + se = r["Annotation Value"] - new_data = {'Table Name': tName, 'Source Expression': se} + new_data = {"Table Name": tName, "Source Expression": se} df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/sempy_labs/directlake/_show_unsupported_directlake_objects.py b/sempy_labs/directlake/_show_unsupported_directlake_objects.py new file mode 100644 index 00000000..6b335751 --- /dev/null +++ b/sempy_labs/directlake/_show_unsupported_directlake_objects.py @@ -0,0 +1,88 @@ +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._list_functions import list_tables +from sempy_labs._helper_functions import format_dax_object_name +from typing import Optional, Tuple + + +def show_unsupported_direct_lake_objects( + dataset: str, workspace: Optional[str] = None +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame, pandas.DataFrame, pandas.DataFrame + 3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake. + """ + + pd.options.mode.chained_assignment = None + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + + # Calc tables + dfT_filt = dfT[dfT["Type"] == "Calculated Table"] + dfT_filt.rename(columns={"Name": "Table Name"}, inplace=True) + t = dfT_filt[["Table Name", "Type"]] + + # Calc columns + dfC_filt = dfC[(dfC["Type"] == "Calculated") | (dfC["Data Type"] == "Binary")] + c = dfC_filt[["Table Name", "Column Name", "Type", "Data Type", "Source"]] + + # Relationships + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + merged_from = pd.merge( + dfR, dfC, left_on="From Object", right_on="Column Object", how="left" + ) + merged_to = pd.merge( + dfR, dfC, left_on="To Object", right_on="Column Object", how="left" + ) + + dfR["From Column Data Type"] = merged_from["Data Type"] + dfR["To Column Data Type"] = merged_to["Data Type"] + + dfR_filt = dfR[ + ( + (dfR["From Column Data Type"] == "DateTime") + | (dfR["To Column Data Type"] == "DateTime") + ) + | (dfR["From Column Data Type"] != dfR["To Column Data Type"]) + ] + r = dfR_filt[ + [ + "From Table", + "From Column", + "To Table", + "To Column", + "From Column Data Type", + "To Column Data Type", + ] + ] + + # print('Calculated Tables are not supported...') + # display(t) + # print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations") + # print('Calculated columns are not supported. Columns of binary data type are not supported.') + # display(c) + # print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.') + # display(r) + + return t, c, r diff --git a/sempy_labs/UpdateDirectLakeModelLakehouseConnection.py b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py similarity index 51% rename from sempy_labs/UpdateDirectLakeModelLakehouseConnection.py rename to sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py index 4f45c309..0ff42fe6 100644 --- a/sempy_labs/UpdateDirectLakeModelLakehouseConnection.py +++ b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py @@ -1,12 +1,17 @@ import sempy import sempy.fabric as fabric from .GetSharedExpression import get_shared_expression -from .HelperFunctions import resolve_lakehouse_name -from .TOM import connect_semantic_model +from sempy_labs._helper_functions import resolve_lakehouse_name +from ..TOM import connect_semantic_model from typing import List, Optional, Union -def update_direct_lake_model_lakehouse_connection(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def update_direct_lake_model_lakehouse_connection( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse. @@ -29,7 +34,7 @@ def update_direct_lake_model_lakehouse_connection(dataset: str, workspace: Optio Returns ------- - """ + """ if workspace == None: workspace_id = fabric.get_workspace_id() @@ -45,27 +50,33 @@ def update_direct_lake_model_lakehouse_connection(dataset: str, workspace: Optio lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) # Check if lakehouse is valid - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'Lakehouse') - dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="Lakehouse") + dfI_filt = dfI[(dfI["Display Name"] == lakehouse)] if len(dfI_filt) == 0: - print(f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace." + ) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models.") + print( + f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models." + ) else: - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: - - shEx = get_shared_expression(lakehouse,lakehouse_workspace) + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: + + shEx = get_shared_expression(lakehouse, lakehouse_workspace) try: - tom.model.Expressions['DatabaseQuery'].Expression = shEx - print(f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace.") + tom.model.Expressions["DatabaseQuery"].Expression = shEx + print( + f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace." + ) except: - print(f"ERROR: The expression in the '{dataset}' semantic model was not updated.") - - - - + print( + f"ERROR: The expression in the '{dataset}' semantic model was not updated." + ) diff --git a/sempy_labs/UpdateDirectLakePartitionEntity.py b/sempy_labs/directlake/_update_directlake_partition_entity.py similarity index 58% rename from sempy_labs/UpdateDirectLakePartitionEntity.py rename to sempy_labs/directlake/_update_directlake_partition_entity.py index 35561abc..b1484e93 100644 --- a/sempy_labs/UpdateDirectLakePartitionEntity.py +++ b/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -1,10 +1,14 @@ -import sempy import sempy.fabric as fabric -from .TOM import connect_semantic_model +from sempy_labs.TOM import connect_semantic_model from typing import List, Optional, Union -def update_direct_lake_partition_entity(dataset: str, table_name: Union[str, List[str]], entity_name: Union[str, List[str]], workspace: Optional[str] = None): +def update_direct_lake_partition_entity( + dataset: str, + table_name: Union[str, List[str]], + entity_name: Union[str, List[str]], + workspace: Optional[str] = None, +): """ Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse. @@ -20,11 +24,7 @@ def update_direct_lake_partition_entity(dataset: str, table_name: Union[str, Lis The Fabric workspace name in which the semantic model exists. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ + """ if workspace == None: workspace_id = fabric.get_workspace_id() @@ -37,26 +37,32 @@ def update_direct_lake_partition_entity(dataset: str, table_name: Union[str, Lis table_name = [table_name] if isinstance(entity_name, str): entity_name = [entity_name] - + if len(table_name) != len(entity_name): - print(f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length.") + print( + f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length." + ) return - - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: + + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: if not tom.is_direct_lake(): - print(f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode.") + print( + f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode." + ) return for tName in table_name: i = table_name.index(tName) eName = entity_name[i] try: - tom.model.Tables[tName].Partitions[0].EntityName = eName - print(f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") + tom.model.Tables[tName].Partitions[0].EntityName = eName + print( + f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) except: - print(f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated.") - - - - + print( + f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated." + ) diff --git a/sempy_labs/lakehouse/__init__.py b/sempy_labs/lakehouse/__init__.py new file mode 100644 index 00000000..af2664d9 --- /dev/null +++ b/sempy_labs/lakehouse/__init__.py @@ -0,0 +1,10 @@ +from sempy_labs.lakehouse._get_lakehouse_columns import ( + get_lakehouse_columns as get_lakehouse_columns, +) +from sempy_labs.lakehouse._get_lakehouse_tables import ( + get_lakehouse_tables as get_lakehouse_tables, +) +from sempy_labs.lakehouse._lakehouse import ( + lakehouse_attached as lakehouse_attached, + optimize_lakehouse_tables as optimize_lakehouse_tables, +) diff --git a/sempy_labs/GetLakehouseColumns.py b/sempy_labs/lakehouse/_get_lakehouse_columns.py similarity index 58% rename from sempy_labs/GetLakehouseColumns.py rename to sempy_labs/lakehouse/_get_lakehouse_columns.py index 56807281..ebc27b30 100644 --- a/sempy_labs/GetLakehouseColumns.py +++ b/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -1,14 +1,18 @@ -import sempy import sempy.fabric as fabric import pandas as pd from pyspark.sql import SparkSession -from delta import DeltaTable -from .HelperFunctions import resolve_lakehouse_name, format_dax_object_name, resolve_lakehouse_id -from .GetLakehouseTables import get_lakehouse_tables -from typing import List, Optional, Union +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + format_dax_object_name, + resolve_lakehouse_id, +) +from typing import Optional +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables -def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def get_lakehouse_columns( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +): """ Shows the tables and columns of a lakehouse and their respective properties. @@ -28,7 +32,16 @@ def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[s Shows the tables/columns within a lakehouse and their properties. """ - df = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'Table Name', 'Column Name', 'Full Column Name', 'Data Type']) + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Lakehouse Name", + "Table Name", + "Column Name", + "Full Column Name", + "Data Type", + ] + ) if workspace == None: workspace_id = fabric.get_workspace_id() @@ -44,18 +57,27 @@ def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[s spark = SparkSession.builder.getOrCreate() - tables = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace, extended = False, count_rows = False) - tables_filt = tables[tables['Format'] == 'delta'] + tables = get_lakehouse_tables( + lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False + ) + tables_filt = tables[tables["Format"] == "delta"] for i, r in tables_filt.iterrows(): - tName = r['Table Name'] - tPath = r['Location'] + tName = r["Table Name"] + tPath = r["Location"] delta_table = DeltaTable.forPath(spark, tPath) sparkdf = delta_table.toDF() for cName, data_type in sparkdf.dtypes: tc = format_dax_object_name(tName, cName) - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Column Name': cName, 'Full Column Name': tc, 'Data Type': data_type} + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Column Name": cName, + "Full Column Name": tc, + "Data Type": data_type, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/sempy_labs/lakehouse/_get_lakehouse_tables.py b/sempy_labs/lakehouse/_get_lakehouse_tables.py new file mode 100644 index 00000000..a38c7d6d --- /dev/null +++ b/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -0,0 +1,248 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from pyspark.sql import SparkSession +import pyarrow.parquet as pq +import datetime +from sempy_labs._helper_functions import resolve_lakehouse_id, resolve_lakehouse_name +from ..Guardrails import get_sku_size, get_directlake_guardrails_for_sku +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from typing import Optional + + +def get_lakehouse_tables( + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, + extended: Optional[bool] = False, + count_rows: Optional[bool] = False, + export: Optional[bool] = False, +): + """ + Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. + + Parameters + ---------- + lakehouse : str, default=None + The Fabric lakehouse. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + extended : bool, default=False + Obtains additional columns relevant to the size of each table. + count_rows : bool, default=False + Obtains a row count for each lakehouse table. + export : bool, default=False + Exports the resulting dataframe to a delta table in the lakehouse. + + Returns + ------- + pandas.DataFrame + Shows the tables/columns within a lakehouse and their properties. + """ + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Lakehouse Name", + "Table Name", + "Format", + "Type", + "Location", + ] + ) + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + + if count_rows: # Setting countrows defaults to extended=True + extended = True + + client = fabric.FabricRestClient() + response = client.get( + f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" + ) + + for i in response.json()["data"]: + tName = i["name"] + tType = i["type"] + tFormat = i["format"] + tLocation = i["location"] + if extended == False: + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + else: + sku_value = get_sku_size(workspace) + guardrail = get_directlake_guardrails_for_sku(sku_value) + + spark = SparkSession.builder.getOrCreate() + + intColumns = ["Files", "Row Groups", "Table Size"] + if tType == "Managed" and tFormat == "delta": + detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0] + num_files = detail_df.numFiles + size_in_bytes = detail_df.sizeInBytes + + delta_table_path = f"Tables/{tName}" + latest_files = ( + spark.read.format("delta").load(delta_table_path).inputFiles() + ) + file_paths = [f.split("/")[-1] for f in latest_files] + + # Handle FileNotFoundError + num_rowgroups = 0 + for filename in file_paths: + try: + num_rowgroups += pq.ParquetFile( + f"/lakehouse/default/{delta_table_path}/{filename}" + ).num_row_groups + except FileNotFoundError: + continue + + if count_rows: + num_rows = spark.table(tName).count() + intColumns.append("Row Count") + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + "Files": num_files, + "Row Groups": num_rowgroups, + "Row Count": num_rows, + "Table Size": size_in_bytes, + } + else: + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + "Files": num_files, + "Row Groups": num_rowgroups, + "Table Size": size_in_bytes, + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + df[intColumns] = df[intColumns].astype(int) + + df["SKU"] = guardrail["Fabric SKUs"].iloc[0] + df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0] + df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0] + df["Row Count Guardrail"] = ( + guardrail["Rows per table (millions)"].iloc[0] * 1000000 + ) + + df["Parquet File Guardrail Hit"] = ( + df["Files"] > df["Parquet File Guardrail"] + ) + df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"] + + if count_rows: + df["Row Count Guardrail Hit"] = ( + df["Row Count"] > df["Row Count Guardrail"] + ) + + if export: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + spark = SparkSession.builder.getOrCreate() + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) + lakeTName = "lakehouse_table_details" + lakeT_filt = df[df["Table Name"] == lakeTName] + + query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" + + if len(lakeT_filt) == 0: + runId = 1 + else: + dfSpark = spark.sql(query) + maxRunId = dfSpark.collect()[0][0] + runId = maxRunId + 1 + + export_df = df.copy() + + cols = [ + "Files", + "Row Groups", + "Row Count", + "Table Size", + "SKU", + "Parquet File Guardrail", + "Row Group Guardrail", + "Row Count Guardrail", + "Parquet File Guardrail Hit", + "Row Group Guardrail Hit", + "Row Count Guardrail Hit", + ] + + for c in cols: + if c not in export_df: + if c in [ + "Files", + "Row Groups", + "Row Count", + "Table Size", + "Parquet File Guardrail", + "Row Group Guardrail", + "Row Count Guardrail", + ]: + export_df[c] = 0 + export_df[c] = export_df[c].astype(int) + elif c in ["SKU"]: + export_df[c] = None + export_df[c] = export_df[c].astype(str) + elif c in [ + "Parquet File Guardrail Hit", + "Row Group Guardrail Hit", + "Row Count Guardrail Hit", + ]: + export_df[c] = False + export_df[c] = export_df[c].astype(bool) + + print( + f"Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n" + ) + now = datetime.datetime.now() + export_df["Timestamp"] = now + export_df["RunId"] = runId + + export_df.columns = export_df.columns.str.replace(" ", "_") + spark_df = spark.createDataFrame(export_df) + spark_df.write.mode("append").format("delta").saveAsTable(lakeTName) + print( + f"\u2022 Lakehouse table properties have been saved to the '{lakeTName}' delta table." + ) + + return df diff --git a/sempy_labs/Lakehouse.py b/sempy_labs/lakehouse/_lakehouse.py similarity index 67% rename from sempy_labs/Lakehouse.py rename to sempy_labs/lakehouse/_lakehouse.py index eb65e010..eebc5f3d 100644 --- a/sempy_labs/Lakehouse.py +++ b/sempy_labs/lakehouse/_lakehouse.py @@ -1,35 +1,34 @@ -import sempy import sempy.fabric as fabric from tqdm.auto import tqdm from pyspark.sql import SparkSession -from delta import DeltaTable -from .HelperFunctions import resolve_lakehouse_name +from sempy_labs._helper_functions import resolve_lakehouse_name from typing import List, Optional, Union -def lakehouse_attached() -> bool: +def lakehouse_attached() -> bool: """ Identifies if a lakehouse is attached to the notebook. - Parameters - ---------- - Returns ------- bool Returns True if a lakehouse is attached to the notebook. - """ + """ spark = SparkSession.builder.getOrCreate() - lakeId = spark.conf.get('trident.lakehouse.id') - + lakeId = spark.conf.get("trident.lakehouse.id") + if len(lakeId) > 0: return True else: return False -def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def optimize_lakehouse_tables( + tables: Optional[Union[str, List[str]]] = None, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): """ Runs the [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) function over the specified lakehouse tables. @@ -44,30 +43,26 @@ def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, la The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - from .GetLakehouseTables import get_lakehouse_tables + from .lakehouse.GetLakehouseTables import get_lakehouse_tables if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - lakeTables = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeTablesDelta = lakeTables[lakeTables['Format'] == 'delta'] + lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"] if isinstance(tables, str): tables = [tables] if tables is not None: - tables_filt = lakeTablesDelta[lakeTablesDelta['Table Name'].isin(tables)] + tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)] else: tables_filt = lakeTablesDelta.copy() @@ -75,12 +70,14 @@ def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, la spark = SparkSession.builder.getOrCreate() - i=1 - for index, r in (bar := tqdm(tables_filt.iterrows())): - tableName = r['Table Name'] - tablePath = r['Location'] + i = 1 + for _, r in (bar := tqdm(tables_filt.iterrows())): + tableName = r["Table Name"] + tablePath = r["Location"] bar.set_description(f"Optimizing the '{tableName}' table...") deltaTable = DeltaTable.forPath(spark, tablePath) deltaTable.optimize().executeCompaction() - print(f"The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})") - i+=1 + print( + f"The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})" + ) + i += 1 diff --git a/sempy_labs/migration/__init__.py b/sempy_labs/migration/__init__.py new file mode 100644 index 00000000..d90bec0b --- /dev/null +++ b/sempy_labs/migration/__init__.py @@ -0,0 +1,16 @@ +from sempy_labs.migration._migrate_calctables_to_lakehouse import ( + migrate_calctables_to_lakehouse as migrate_calctables_to_lakehouse, + migrate_field_parameters as migrate_field_parameters, +) +from sempy_labs.migration._migrate_calctables_to_semantic_model import ( + migrate_calc_tables_to_semantic_model as migrate_calc_tables_to_semantic_model, +) +from sempy_labs.migration._migrate_model_objects_to_semantic_model import ( + migrate_model_objects_to_semantic_model as migrate_model_objects_to_semantic_model, +) +from sempy_labs.migration._migrate_tables_columns_to_semantic_model import ( + migrate_tables_columns_to_semantic_model as migrate_tables_columns_to_semantic_model, +) +from sempy_labs.migration._migration_validation import ( + migration_validation as migration_validation, +) diff --git a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py new file mode 100644 index 00000000..27a0f49f --- /dev/null +++ b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -0,0 +1,433 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import re, datetime, time +from .lakehouse.GetLakehouseTables import get_lakehouse_tables +from .HelperFunctions import ( + resolve_lakehouse_name, + resolve_lakehouse_id, + create_abfss_path, +) +from .TOM import connect_semantic_model +from pyspark.sql import SparkSession +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_calc_tables_to_lakehouse( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) + else: + lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace) + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + # dfC['Column Object'] = "'" + dfC['Table Name'] + "'[" + dfC['Column Name'] + "]" + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[ + ~dfP_filt["Query"].str.contains("NAMEOF") + ] # Remove field parameters + # dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] + lakeTables = get_lakehouse_tables(lakehouse, lakehouse_workspace) + + # Do not execute the function if lakehouse tables already exist with the same name + killFunction = False + for i, r in dfP_filt.iterrows(): + tName = r["Table Name"] + dtName = tName.replace(" ", "_") + + if dtName in lakeTables["Table Name"].values: + print( + f"{icons.red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse}' lakehouse in the '{workspace}' workspace." + ) + killFunction = True + + if killFunction: + return + + spark = SparkSession.builder.getOrCreate() + + if len(dfP_filt) == 0: + print( + f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + success = True + for t in tom.model.Tables: + if tom.is_auto_date_table(table_name=t.Name): + print( + f"{icons.yellow_dot} The '{t.Name}' table is an auto-datetime table and is not supported in the Direct Lake migration process. Please create a proper Date/Calendar table in your lakehoues and use it in your Direct Lake model." + ) + else: + for p in t.Partitions: + if str(p.SourceType) == "Calculated": + query = p.Source.Expression + if "NAMEOF" not in query: # exclude field parameters + daxQuery = "" + if query.lower().startswith("calendar") and any( + str(c.Type) == "Calculated" for c in t.Columns + ): + daxQuery = f"ADDCOLUMNS(\n{query}," + for c in t.Columns: + if str(c.Type) == "Calculated": + expr = c.Expression + expr = expr.replace( + f"'{t.Name}'", "" + ).replace(f"{t.Name}[Date]", "[Date]") + expr = expr.replace( + "[MonthNo]", "MONTH([Date])" + ).replace( + "[QuarterNo]", + "INT((MONTH([Date]) + 2) / 3)", + ) + daxQuery = ( + f'{daxQuery}\n"{c.Name}",{expr},' + ) + daxQuery = ( + "EVALUATE\n" + daxQuery.rstrip(",") + "\n)" + ) + else: + daxQuery = f"EVALUATE\n{query}" + daxQueryTopN = ( + daxQuery.replace( + "EVALUATE\n", "EVALUATE\nTOPN(1," + ) + + ")" + ) + + try: + df = fabric.evaluate_dax( + dataset=dataset, + dax_string=daxQueryTopN, + workspace=workspace, + ) + + for col in df.columns: + pattern = r"\[([^\]]+)\]" + + matches = re.findall(pattern, col) + new_column_name = matches[0].replace( + " ", "" + ) + + df.rename( + columns={col: new_column_name}, + inplace=True, + ) + + try: + dataType = next( + str(c.DataType) + for c in tom.model.Tables[ + t.Name + ].Columns + if str(c.Type) + == "CalculatedTableColumn" + and c.SourceColumn == col + ) + except: + dataType = next( + str(c.DataType) + for c in tom.model.Tables[ + t.Name + ].Columns + if str(c.Type) == "Calculated" + and c.Name == new_column_name + ) + + if dataType == "Int64": + df[new_column_name] = df[ + new_column_name + ].astype(int) + elif dataType in ["Decimal", "Double"]: + df[new_column_name] = df[ + new_column_name + ].astype(float) + elif dataType == "Boolean": + df[new_column_name] = df[ + new_column_name + ].astype(bool) + elif dataType == "DateTime": + df[new_column_name] = pd.to_datetime( + df[new_column_name] + ) + + delta_table_name = t.Name.replace( + " ", "_" + ).lower() + + spark_df = spark.createDataFrame(df) + filePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=delta_table_name, + ) + spark_df.write.mode("overwrite").format( + "delta" + ).save(filePath) + + start_time2 = datetime.datetime.now() + timeout2 = datetime.timedelta(minutes=1) + success2 = False + + while not success2: + try: + with connect_semantic_model( + dataset=new_dataset, + readonly=False, + workspace=new_dataset_workspace, + ) as tom2: + success2 = True + tom2.set_annotation( + object=tom2.model, + name=t.Name, + value=daxQuery, + ) + except Exception as e: + if ( + datetime.datetime.now() + - start_time2 + > timeout2 + ): + break + time.sleep(1) + + print( + f"{icons.green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + except: + print( + f"{icons.red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) + + +@log +def migrate_field_parameters( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Migrates field parameters from one semantic model to another. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + from .HelperFunctions import format_dax_object_name + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[ + dfP_filt["Query"].str.contains("NAMEOF") + ] # Only field parameters + dfC_CalcColumn = dfC[dfC["Type"] == "Calculated"] + + if len(dfP_filt) == 0: + print( + f"{icons.green_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no field parameters." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, workspace=new_dataset_workspace, readonly=False + ) as tom: + success = True + + for i, r in dfP_filt.iterrows(): + tName = r["Table Name"] + query = r["Query"] + + # For field parameters, remove calc columns from the query + rows = query.strip().split("\n") + filtered_rows = [ + row + for row in rows + if not any( + value in row + for value in dfC_CalcColumn["Column Object"].values + ) + ] + updated_query_string = "\n".join(filtered_rows) + + # Remove extra comma + lines = updated_query_string.strip().split("\n") + lines[-2] = lines[-2].rstrip(",") + expr = "\n".join(lines) + + try: + par = TOM.Partition() + par.Name = tName + + parSource = TOM.CalculatedPartitionSource() + par.Source = parSource + parSource.Expression = expr + + tbl = TOM.Table() + tbl.Name = tName + tbl.Partitions.Add(par) + + columns = ["Value1", "Value2", "Value3"] + + for colName in columns: + col = TOM.CalculatedTableColumn() + col.Name = colName + col.SourceColumn = "[" + colName + "]" + col.DataType = TOM.DataType.String + + tbl.Columns.Add(col) + + tom.model.Tables.Add(tbl) + + ep = TOM.JsonExtendedProperty() + ep.Name = "ParameterMetadata" + ep.Value = '{"version":3,"kind":2}' + + rcd = TOM.RelatedColumnDetails() + gpc = TOM.GroupByColumn() + gpc.GroupingColumn = tom.model.Tables[tName].Columns["Value2"] + rcd.GroupByColumns.Add(gpc) + + # Update column properties + tom.model.Tables[tName].Columns["Value2"].IsHidden = True + tom.model.Tables[tName].Columns["Value3"].IsHidden = True + tom.model.Tables[tName].Columns[ + "Value3" + ].DataType = TOM.DataType.Int64 + tom.model.Tables[tName].Columns["Value1"].SortByColumn = ( + tom.model.Tables[tName].Columns["Value3"] + ) + tom.model.Tables[tName].Columns["Value2"].SortByColumn = ( + tom.model.Tables[tName].Columns["Value3"] + ) + tom.model.Tables[tName].Columns[ + "Value2" + ].ExtendedProperties.Add(ep) + tom.model.Tables[tName].Columns[ + "Value1" + ].RelatedColumnDetails = rcd + + dfC_filt1 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value1]") + ] + col1 = dfC_filt1["Column Name"].iloc[0] + dfC_filt2 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value2]") + ] + col2 = dfC_filt2["Column Name"].iloc[0] + dfC_filt3 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value3]") + ] + col3 = dfC_filt3["Column Name"].iloc[0] + + tom.model.Tables[tName].Columns["Value1"].Name = col1 + tom.model.Tables[tName].Columns["Value2"].Name = col2 + tom.model.Tables[tName].Columns["Value3"].Name = col3 + + print( + f"{icons.green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace}' workspace." + ) + except: + print( + f"{icons.red_dot} The '{tName}' table has not been added as a field parameter." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py new file mode 100644 index 00000000..470c3942 --- /dev/null +++ b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py @@ -0,0 +1,153 @@ +import sempy.fabric as fabric +import re, datetime, time +from .lakehouse.GetLakehouseTables import get_lakehouse_tables +from .HelperFunctions import resolve_lakehouse_name +from .TOM import connect_semantic_model +from typing import Optional +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_calc_tables_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + # Get calc tables but not field parameters + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[~dfP_filt["Query"].str.contains("NAMEOF")] + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + lc = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) + # Get all calc table columns of calc tables not including field parameters + dfC_filt = dfC[ + (dfC["Table Name"].isin(dfP_filt["Table Name"])) + ] # & (dfC['Type'] == 'CalculatedTableColumn')] + # dfA = list_annotations(new_dataset, new_dataset_workspace) + # dfA_filt = dfA[(dfA['Object Type'] == 'Model') & ~ (dfA['Annotation Value'].str.contains('NAMEOF'))] + + if len(dfP_filt) == 0: + print( + f"{icons.green_dot} The '{dataset}' semantic model has no calculated tables." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + for tName in dfC_filt["Table Name"].unique(): + if tName.lower() in lc["Table Name"].values: + + try: + tom.model.Tables[tName] + except: + tom.add_table(name=tName) + tom.add_entity_partition( + table_name=tName, + entity_name=tName.replace(" ", "_").lower(), + ) + + columns_in_table = dfC_filt.loc[ + dfC_filt["Table Name"] == tName, "Column Name" + ].unique() + + for cName in columns_in_table: + scName = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Source", + ].iloc[0] + cDataType = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Data Type", + ].iloc[0] + cType = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Type", + ].iloc[0] + + # av = tom.get_annotation_value(object = tom.model, name = tName) + + # if cType == 'CalculatedTableColumn': + # lakeColumn = scName.replace(' ','_') + # elif cType == 'Calculated': + pattern = r"\[([^]]+)\]" + + matches = re.findall(pattern, scName) + lakeColumn = matches[0].replace(" ", "") + try: + tom.model.Tables[tName].Columns[cName] + except: + tom.add_data_column( + table_name=tName, + column_name=cName, + source_column=lakeColumn, + data_type=cDataType, + ) + print( + f"{icons.green_dot} The '{tName}'[{cName}] column has been added." + ) + + print( + f"\n{icons.green_dot} All viable calculated tables have been added to the model." + ) + + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py new file mode 100644 index 00000000..72381854 --- /dev/null +++ b/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py @@ -0,0 +1,524 @@ +import sempy +import sempy.fabric as fabric +import re, datetime, time +from ._list_functions import list_tables +from .HelperFunctions import create_relationship_name +from .TOM import connect_semantic_model +from typing import Optional +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_model_objects_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspaceId = fabric.resolve_workspace_id(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + dfRole = fabric.get_roles(dataset=dataset, workspace=workspace) + dfRLS = fabric.get_row_level_security_permissions( + dataset=dataset, workspace=workspace + ) + dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace) + dfP = fabric.list_perspectives(dataset=dataset, workspace=workspace) + dfTranslation = fabric.list_translations(dataset=dataset, workspace=workspace) + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfPar = fabric.list_partitions(dataset=dataset, workspace=workspace) + + dfP_cc = dfPar[(dfPar["Source Type"] == "Calculated")] + dfP_fp = dfP_cc[dfP_cc["Query"].str.contains("NAMEOF")] + dfC_fp = dfC[dfC["Table Name"].isin(dfP_fp["Table Name"].values)] + + print(f"{icons.in_progress} Updating '{new_dataset}' based on '{dataset}'...") + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + + isDirectLake = any( + str(p.Mode) == "DirectLake" + for t in tom.model.Tables + for p in t.Partitions + ) + + print(f"\n{icons.in_progress} Updating table properties...") + for t in tom.model.Tables: + t.IsHidden = bool(dfT.loc[dfT["Name"] == t.Name, "Hidden"].iloc[0]) + t.Description = dfT.loc[dfT["Name"] == t.Name, "Description"].iloc[ + 0 + ] + t.DataCategory = dfT.loc[ + dfT["Name"] == t.Name, "Data Category" + ].iloc[0] + + print( + f"{icons.green_dot} The '{t.Name}' table's properties have been updated." + ) + + print(f"\n{icons.in_progress} Updating column properties...") + for t in tom.model.Tables: + if ( + t.Name not in dfP_fp["Table Name"].values + ): # do not include field parameters + dfT_filtered = dfT[dfT["Name"] == t.Name] + tType = dfT_filtered["Type"].iloc[0] + for c in t.Columns: + if not c.Name.startswith("RowNumber-"): + dfC_filt = dfC[ + (dfC["Table Name"] == t.Name) + & (dfC["Column Name"] == c.Name) + ] + cName = dfC_filt["Column Name"].iloc[0] + c.Name = cName + if tType == "Table": + c.SourceColumn = cName.replace(" ", "_") + c.IsHidden = bool(dfC_filt["Hidden"].iloc[0]) + c.DataType = System.Enum.Parse( + TOM.DataType, dfC_filt["Data Type"].iloc[0] + ) + c.DisplayFolder = dfC_filt["Display Folder"].iloc[0] + c.FormatString = dfC_filt["Format String"].iloc[0] + c.SummarizeBy = System.Enum.Parse( + TOM.AggregateFunction, + dfC_filt["Summarize By"].iloc[0], + ) + c.DataCategory = dfC_filt["Data Category"].iloc[0] + c.IsKey = bool(dfC_filt["Key"].iloc[0]) + sbc = dfC_filt["Sort By Column"].iloc[0] + + if sbc != None: + try: + c.SortByColumn = tom.model.Tables[ + t.Name + ].Columns[sbc] + except: + print( + f"{icons.red_dot} Failed to create '{sbc}' as a Sort By Column for the '{c.Name}' in the '{t.Name}' table." + ) + print( + f"{icons.green_dot} The '{t.Name}'[{c.Name}] column's properties have been updated." + ) + + print(f"\n{icons.in_progress} Creating hierarchies...") + dfH_grouped = ( + dfH.groupby( + [ + "Table Name", + "Hierarchy Name", + "Hierarchy Hidden", + "Hierarchy Description", + ] + ) + .agg({"Level Name": list, "Column Name": list}) + .reset_index() + ) + + for i, r in dfH_grouped.iterrows(): + tName = r["Table Name"] + hName = r["Hierarchy Name"] + hDesc = r["Hierarchy Description"] + hHid = bool(r["Hierarchy Hidden"]) + cols = r["Column Name"] + lvls = r["Level Name"] + + try: + tom.model.Tables[tName].Hierarchies[hName] + except: + tom.add_hierarchy( + table_name=tName, + hierarchy_name=hName, + hierarchy_description=hDesc, + hierarchy_hidden=hHid, + columns=cols, + levels=lvls, + ) + print( + f"{icons.green_dot} The '{hName}' hierarchy has been added." + ) + + print(f"\n{icons.in_progress} Creating measures...") + for i, r in dfM.iterrows(): + tName = r["Table Name"] + mName = r["Measure Name"] + mExpr = r["Measure Expression"] + mHidden = bool(r["Measure Hidden"]) + mDF = r["Measure Display Folder"] + mDesc = r["Measure Description"] + mFS = r["Format String"] + + try: + tom.model.Tables[tName].Measures[mName] + except: + tom.add_measure( + table_name=tName, + measure_name=mName, + expression=mExpr, + hidden=mHidden, + display_folder=mDF, + description=mDesc, + format_string=mFS, + ) + print( + f"{icons.green_dot} The '{mName}' measure has been added." + ) + + for cgName in dfCI["Calculation Group Name"].unique(): + + isHidden = bool( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Hidden" + ].iloc[0] + ) + prec = int( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Precedence" + ].iloc[0] + ) + desc = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Description" + ].iloc[0] + + try: + tom.model.Tables[cgName] + except: + tom.add_calculation_group( + name=cgName, + description=desc, + precedence=prec, + hidden=isHidden, + ) + print( + f"{icons.green_dot} The '{cgName}' calculation group has been added." + ) + tom.model.DiscourageImplicitMeasures = True + + print( + f"\n{icons.in_progress} Updating calculation group column name..." + ) + dfC_filt = dfC[ + (dfC["Table Name"] == cgName) & (dfC["Hidden"] == False) + ] + colName = dfC_filt["Column Name"].iloc[0] + tom.model.Tables[cgName].Columns["Name"].Name = colName + + calcItems = dfCI.loc[ + dfCI["Calculation Group Name"] == cgName, + "Calculation Item Name", + ].unique() + + print(f"\n{icons.in_progress} Creating calculation items...") + for calcItem in calcItems: + ordinal = int( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Ordinal", + ].iloc[0] + ) + expr = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Expression", + ].iloc[0] + fse = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Format String Expression", + ].iloc[0] + try: + tom.model.Tables[cgName].CalculationGroup.CalculationItems[ + calcItem + ] + except: + tom.add_calculation_item( + table_name=cgName, + calculation_item_name=calcItem, + expression=expr, + format_string_expression=fse, + ordinal=ordinal, + ) + print( + f"{icons.green_dot} The '{calcItem}' has been added to the '{cgName}' calculation group." + ) + + print(f"\n{icons.in_progress} Creating relationships...") + for index, row in dfR.iterrows(): + fromTable = row["From Table"] + fromColumn = row["From Column"] + toTable = row["To Table"] + toColumn = row["To Column"] + isActive = row["Active"] + cfb = row["Cross Filtering Behavior"] + sfb = row["Security Filtering Behavior"] + rori = row["Rely On Referential Integrity"] + mult = row["Multiplicity"] + + card_mapping = {"m": "Many", "1": "One", "0": "None"} + + fromCard = card_mapping.get(mult[0]) + toCard = card_mapping.get(mult[-1]) + + relName = create_relationship_name( + fromTable, fromColumn, toTable, toColumn + ) + + if any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} already exists as a relationship in the semantic model." + ) + elif isDirectLake and any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + and ( + r.FromColumn.DataType == "DateTime" + or r.ToColumn.DataType == "DateTime" + ) + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} was not created since relationships based on DateTime columns are not supported." + ) + elif isDirectLake and any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + and (r.FromColumn.DataType != r.ToColumn.DataType) + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} was not created since columns used in a relationship must have the same data type." + ) + else: + try: + tom.add_relationship( + from_table=fromTable, + from_column=fromColumn, + to_table=toTable, + to_column=toColumn, + from_cardinality=fromCard, + to_cardinality=toCard, + cross_filtering_behavior=cfb, + security_filtering_behavior=sfb, + rely_on_referential_integrity=rori, + is_active=isActive, + ) + + print( + f"{icons.green_dot} The {relName} relationship has been added." + ) + except: + print( + f"{icons.red_dot} The {relName} relationship was not added." + ) + + print(f"\n{icons.in_progress} Creating roles...") + for index, row in dfRole.iterrows(): + roleName = row["Role"] + roleDesc = row["Description"] + modPerm = row["Model Permission"] + + try: + tom.model.Roles[roleName] + except: + tom.add_role( + role_name=roleName, + model_permission=modPerm, + description=roleDesc, + ) + print( + f"{icons.green_dot} The '{roleName}' role has been added." + ) + + print(f"\n{icons.in_progress} Creating row level security...") + for index, row in dfRLS.iterrows(): + roleName = row["Role"] + tName = row["Table"] + expr = row["Filter Expression"] + + try: + tom.set_rls( + role_name=roleName, table_name=tName, filter_expression=expr + ) + print( + f"{icons.green_dot} Row level security for the '{tName}' table within the '{roleName}' role has been set." + ) + except: + print( + f"{icons.red_dot} Row level security for the '{tName}' table within the '{roleName}' role was not set." + ) + + print(f"\n{icons.in_progress} Creating perspectives...") + for pName in dfP["Perspective Name"].unique(): + + try: + tom.model.Perspectives[pName] + except: + tom.add_perspective(perspective_name=pName) + print( + f"{icons.green_dot} The '{pName}' perspective has been added." + ) + + print(f"\n{icons.in_progress} Adding objects to perspectives...") + for index, row in dfP.iterrows(): + pName = row["Perspective Name"] + tName = row["Table Name"] + oName = row["Object Name"] + oType = row["Object Type"] + tType = dfT.loc[(dfT["Name"] == tName), "Type"].iloc[0] + + try: + if oType == "Table": + tom.add_to_perspective( + object=tom.model.Tables[tName], perspective_name=pName + ) + elif oType == "Column": + tom.add_to_perspective( + object=tom.model.Tables[tName].Columns[oName], + perspective_name=pName, + ) + elif oType == "Measure": + tom.add_to_perspective( + object=tom.model.Tables[tName].Measures[oName], + perspective_name=pName, + ) + elif oType == "Hierarchy": + tom.add_to_perspective( + object=tom.model.Tables[tName].Hierarchies[oName], + perspective_name=pName, + ) + except: + pass + + print(f"\n{icons.in_progress} Creating translation languages...") + for trName in dfTranslation["Culture Name"].unique(): + try: + tom.model.Cultures[trName] + except: + tom.add_translation(trName) + print( + f"{icons.green_dot} The '{trName}' translation language has been added." + ) + + print(f"\n{icons.in_progress} Creating translation values...") + for index, row in dfTranslation.iterrows(): + trName = row["Culture Name"] + tName = row["Table Name"] + oName = row["Object Name"] + oType = row["Object Type"] + translation = row["Translation"] + prop = row["Property"] + + if prop == "Caption": + prop = "Name" + elif prop == "DisplayFolder": + prop = "Display Folder" + + try: + if oType == "Table": + tom.set_translation( + object=tom.model.Tables[tName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Column": + tom.set_translation( + object=tom.model.Tables[tName].Columns[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Measure": + tom.set_translation( + object=tom.model.Tables[tName].Measures[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Hierarchy": + tom.set_translation( + object=tom.model.Tables[tName].Hierarchies[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Level": + + pattern = r"\[([^]]+)\]" + matches = re.findall(pattern, oName) + lName = matches[0] + + pattern = r"'([^']+)'" + matches = re.findall(pattern, oName) + hName = matches[0] + tom.set_translation( + object=tom.model.Tables[tName] + .Hierarchies[hName] + .Levels[lName], + language=trName, + property=prop, + value=translation, + ) + except: + pass + + print( + f"\n{icons.green_dot} Migration of objects from '{dataset}' -> '{new_dataset}' is complete." + ) + + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py new file mode 100644 index 00000000..6461f107 --- /dev/null +++ b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py @@ -0,0 +1,169 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import datetime, time +from ._list_functions import list_tables +from .GetSharedExpression import get_shared_expression +from .HelperFunctions import resolve_lakehouse_name +from .lakehouse.Lakehouse import lakehouse_attached +from .TOM import connect_semantic_model +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_tables_columns_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + # Check that lakehouse is attached to the notebook + lakeAttach = lakehouse_attached() + + # Run if lakehouse is attached to the notebook or a lakehouse & lakehouse workspace are specified + if lakeAttach or (lakehouse is not None and lakehouse_workspace is not None): + shEx = get_shared_expression(lakehouse, lakehouse_workspace) + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfT = list_tables(dataset, workspace) + dfT.rename(columns={"Type": "Table Type"}, inplace=True) + dfC = pd.merge( + dfC, + dfT[["Name", "Table Type"]], + left_on="Table Name", + right_on="Name", + how="left", + ) + dfT_filt = dfT[dfT["Table Type"] == "Table"] + dfC_filt = dfC[ + (dfC["Table Type"] == "Table") + & ~(dfC["Column Name"].str.startswith("RowNumber-")) + & (dfC["Type"] != "Calculated") + ] + + print(f"{icons.in_progress} Updating '{new_dataset}' based on '{dataset}'...") + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + try: + tom.model.Expressions["DatabaseQuery"] + except: + tom.add_expression("DatabaseQuery", expression=shEx) + print( + f"{icons.green_dot} The 'DatabaseQuery' expression has been added." + ) + + for i, r in dfT_filt.iterrows(): + tName = r["Name"] + tDC = r["Data Category"] + tHid = bool(r["Hidden"]) + tDesc = r["Description"] + + try: + tom.model.Tables[tName] + except: + tom.add_table( + name=tName, + description=tDesc, + data_category=tDC, + hidden=tHid, + ) + tom.add_entity_partition( + table_name=tName, entity_name=tName.replace(" ", "_") + ) + print( + f"{icons.green_dot} The '{tName}' table has been added." + ) + + for i, r in dfC_filt.iterrows(): + tName = r["Table Name"] + cName = r["Column Name"] + scName = r["Source"].replace(" ", "_") + cHid = bool(r["Hidden"]) + cDataType = r["Data Type"] + + try: + tom.model.Tables[tName].Columns[cName] + except: + tom.add_data_column( + table_name=tName, + column_name=cName, + source_column=scName, + hidden=cHid, + data_type=cDataType, + ) + print( + f"{icons.green_dot} The '{tName}'[{cName}] column has been added." + ) + + print( + f"\n{icons.green_dot} All regular tables and columns have been added to the '{new_dataset}' semantic model." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) + else: + print( + f"{icons.red_dot} Lakehouse not attached to notebook and lakehouse/lakehouse_workspace are not specified. Please add your lakehouse to this notebook or specify the lakehouse/lakehouse_workspace parameters." + ) + print( + f"To attach a lakehouse to a notebook, go to the the 'Explorer' window to the left, click 'Lakehouses' to add your lakehouse to this notebook" + ) + print( + f"\nLearn more here: https://learn.microsoft.com/fabric/data-engineering/lakehouse-notebook-explore#add-or-remove-a-lakehouse" + ) diff --git a/sempy_labs/migration/_migration_validation.py b/sempy_labs/migration/_migration_validation.py new file mode 100644 index 00000000..4e0c9c16 --- /dev/null +++ b/sempy_labs/migration/_migration_validation.py @@ -0,0 +1,230 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from .HelperFunctions import create_relationship_name +from .TOM import connect_semantic_model +from typing import List, Optional, Union +from sempy._utils._log import log + + +def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None): + """ + Shows a list of semantic model objects. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of objects in the semantic model + """ + + if workspace is None: + workspace = fabric.resolve_workspace_name() + + df = pd.DataFrame(columns=["Parent Name", "Object Name", "Object Type"]) + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + for t in tom.model.Tables: + if t.CalculationGroup is not None: + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": "Calculation Group", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for ci in t.CalculationGroup.CalculationItems: + new_data = { + "Parent Name": t.Name, + "Object Name": ci.Name, + "Object Type": str(ci.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + elif any(str(p.SourceType) == "Calculated" for p in t.Partitions): + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": "Calculated Table", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": str(t.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for c in t.Columns: + if str(c.Type) != "RowNumber": + if str(c.Type) == "Calculated": + new_data = { + "Parent Name": c.Parent.Name, + "Object Name": c.Name, + "Object Type": "Calculated Column", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + new_data = { + "Parent Name": c.Parent.Name, + "Object Name": c.Name, + "Object Type": str(c.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for m in t.Measures: + new_data = { + "Parent Name": m.Parent.Name, + "Object Name": m.Name, + "Object Type": str(m.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for h in t.Hierarchies: + new_data = { + "Parent Name": h.Parent.Name, + "Object Name": h.Name, + "Object Type": str(h.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for l in h.Levels: + new_data = { + "Parent Name": l.Parent.Name, + "Object Name": l.Name, + "Object Type": str(l.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for p in t.Partitions: + new_data = { + "Parent Name": p.Parent.Name, + "Object Name": p.Name, + "Object Type": str(p.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for r in tom.model.Relationships: + rName = create_relationship_name( + r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name + ) + new_data = { + "Parent Name": r.Parent.Name, + "Object Name": rName, + "Object Type": str(r.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for role in tom.model.Roles: + new_data = { + "Parent Name": role.Parent.Name, + "Object Name": role.Name, + "Object Type": str(role.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for rls in role.TablePermissions: + new_data = { + "Parent Name": role.Name, + "Object Name": rls.Name, + "Object Type": str(rls.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for tr in tom.model.Cultures: + new_data = { + "Parent Name": tr.Parent.Name, + "Object Name": tr.Name, + "Object Type": str(tr.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for per in tom.model.Perspectives: + new_data = { + "Parent Name": per.Parent.Name, + "Object Name": per.Name, + "Object Type": str(per.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +@log +def migration_validation( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Shows the objects in the original semantic model and whether then were migrated successfully or not. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. + """ + + dfA = list_semantic_model_objects(dataset=dataset, workspace=workspace) + dfB = list_semantic_model_objects( + dataset=new_dataset, workspace=new_dataset_workspace + ) + + def is_migrated(row): + if row["Object Type"] == "Calculated Table": + return ( + (dfB["Parent Name"] == row["Parent Name"]) + & (dfB["Object Name"] == row["Object Name"]) + & (dfB["Object Type"].isin(["Calculated Table", "Table"])) + ).any() + else: + return ( + (dfB["Parent Name"] == row["Parent Name"]) + & (dfB["Object Name"] == row["Object Name"]) + & (dfB["Object Type"] == row["Object Type"]) + ).any() + + dfA["Migrated"] = dfA.apply(is_migrated, axis=1) + + denom = len(dfA) + num = len(dfA[dfA["Migrated"]]) + print(f"{100 * round(num / denom,2)}% migrated") + + return dfA diff --git a/sempy_labs/report/__init__.py b/sempy_labs/report/__init__.py new file mode 100644 index 00000000..f908ea90 --- /dev/null +++ b/sempy_labs/report/__init__.py @@ -0,0 +1,15 @@ +from sempy_labs.report._generate_report import ( + create_report_from_reportjson as create_report_from_reportjson, + update_report_from_reportjson as update_report_from_reportjson, +) +from sempy_labs.report._report_functions import ( + get_report_json as get_report_json, + report_dependency_tree as report_dependency_tree, + export_report as export_report, + clone_report as clone_report, + launch_report as launch_report, + list_report_pages as list_report_pages, + list_report_visuals as list_report_visuals, + list_report_bookmarks as list_report_bookmarks, + translate_report_titles as translate_report_titles, +) diff --git a/sempy_labs/report/_generate_report.py b/sempy_labs/report/_generate_report.py new file mode 100644 index 00000000..94139157 --- /dev/null +++ b/sempy_labs/report/_generate_report.py @@ -0,0 +1,260 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import json, base64, time +from typing import List, Optional, Union + + +def create_report_from_reportjson( + report: str, + dataset: str, + report_json: str, + theme_json: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Creates a report based on a report.json file (and an optional themes.json file). + + Parameters + ---------- + report : str + Name of the report. + dataset : str + Name of the semantic model to connect to the report. + report_json : str + The report.json file to be used to create the report. + theme_json : str, default=None + The theme.json file to be used for the theme of the report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + objectType = "Report" + + dfI_m = fabric.list_items(workspace=workspace, type="SemanticModel") + dfI_model = dfI_m[(dfI_m["Display Name"] == dataset)] + + if len(dfI_model) == 0: + print( + f"ERROR: The '{dataset}' semantic model does not exist in the '{workspace}' workspace." + ) + return + + datasetId = dfI_model["Id"].iloc[0] + + dfI_r = fabric.list_items(workspace=workspace, type="Report") + dfI_rpt = dfI_r[(dfI_r["Display Name"] == report)] + + if len(dfI_rpt) > 0: + print( + f"WARNING: '{report}' already exists as a report in the '{workspace}' workspace." + ) + return + + client = fabric.FabricRestClient() + defPBIR = { + "version": "1.0", + "datasetReference": { + "byPath": None, + "byConnection": { + "connectionString": None, + "pbiServiceModelId": None, + "pbiModelVirtualServerName": "sobe_wowvirtualserver", + "pbiModelDatabaseName": datasetId, + "name": "EntityDataSource", + "connectionType": "pbiServiceXmlaStyleLive", + }, + }, + } + + def conv_b64(file): + + loadJson = json.dumps(file) + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + + return f + + definitionPBIR = conv_b64(defPBIR) + payloadReportJson = conv_b64(report_json) + + if theme_json == None: + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": definitionPBIR, + "payloadType": "InlineBase64", + }, + ] + }, + } + else: + payloadThemeJson = conv_b64(theme_json) + themeID = theme_json["payload"]["blob"]["displayName"] + themePath = "StaticResources/SharedResources/BaseThemes/" + themeID + ".json" + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": themePath, + "payload": payloadThemeJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": definitionPBIR, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post(f"/v1/workspaces/{workspace_id}/items", json=request_body) + + if response.status_code == 201: + print("Report creation succeeded") + print(response.json()) + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + print("Report creation succeeded") + print(response.json()) + + +def update_report_from_reportjson( + report: str, report_json: str, workspace: Optional[str] = None +): + """ + Updates a report based on a report.json file. + + Parameters + ---------- + report : str + Name of the report. + report_json : str + The report.json file to be used to update the report. + workspace : str, default=None + The Fabric workspace name in which the report resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + objectType = "Report" + + dfR = fabric.list_reports(workspace=workspace) + dfR_filt = dfR[(dfR["Name"] == report) & (dfR["Report Type"] == "PowerBIReport")] + + if len(dfR_filt) == 0: + print(f"The '{report}' report does not exist in the '{workspace}' workspace.") + return + + reportId = dfR_filt["Id"].iloc[0] + client = fabric.FabricRestClient() + + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{reportId}/getDefinition" + ) + df_items = pd.json_normalize(response.json()["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "definition.pbir"] + rptDefFile = df_items_filt["payload"].iloc[0] + # datasetId = dfR_filt['Dataset Id'].iloc[0] + # datasetWorkspaceId = dfR_filt['Dataset Workspace Id'].iloc[0] + + # defPBIR = { + # "version": "1.0", + # "datasetReference": { + # "byPath": None, + # "byConnection": { + # "connectionString": None, + # "pbiServiceModelId": None, + # "pbiModelVirtualServerName": "sobe_wowvirtualserver", + # "pbiModelDatabaseName": datasetId, + # "name": "EntityDataSource", + # "connectionType": "pbiServiceXmlaStyleLive" + # } + # } + # } + + def conv_b64(file): + + loadJson = json.dumps(file) + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + + return f + + # definitionPBIR = conv_b64(defPBIR) + payloadReportJson = conv_b64(report_json) + + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": rptDefFile, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post( + f"/v1/workspaces/{workspace_id}/reports/{reportId}/updateDefinition", + json=request_body, + ) + + if response.status_code == 201: + print(f"The '{report}' report has been successfully updated.") + # print(response.json()) + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + print(f"The '{report}' report has been successfully updated.") + # print(response.json()) diff --git a/sempy_labs/report/_report_functions.py b/sempy_labs/report/_report_functions.py new file mode 100644 index 00000000..a293c557 --- /dev/null +++ b/sempy_labs/report/_report_functions.py @@ -0,0 +1,869 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import json, os, time, base64, copy, re +from anytree import Node, RenderTree +from powerbiclient import Report +from synapse.ml.services import Translate +from pyspark.sql.functions import col, flatten +from pyspark.sql import SparkSession +from .report._generate_report import update_report_from_reportjson +from .Translations import language_validate +from .lakehouse.Lakehouse import lakehouse_attached +from .HelperFunctions import ( + generate_embedded_filter, + resolve_dataset_name, + resolve_report_id, + resolve_lakehouse_name, +) +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +def get_report_json( + report: str, + workspace: Optional[str] = None, + save_to_file_name: Optional[str] = None, +): + """ + Gets the report.json file content of a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + save_to_file_name : str, default=None + Specifying this parameter will save the report.json file to the lakehouse attached to the notebook with the file name of this parameter. + + Returns + ------- + str + The report.json file for a given Power BI report. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + client = fabric.FabricRestClient() + + dfI = fabric.list_items(workspace=workspace, type="Report") + dfI_filt = dfI[(dfI["Display Name"] == report)] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist in the '{workspace}' workspace." + ) + return + + itemId = dfI_filt["Id"].iloc[0] + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" + ) + df_items = pd.json_normalize(response.json()["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "report.json"] + payload = df_items_filt["payload"].iloc[0] + + reportFile = base64.b64decode(payload).decode("utf-8") + reportJson = json.loads(reportFile) + + if save_to_file_name is not None: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + folderPath = "/lakehouse/default/Files" + fileExt = ".json" + if not save_to_file_name.endswith(fileExt): + save_to_file_name = save_to_file_name + fileExt + filePath = os.path.join(folderPath, save_to_file_name) + with open(filePath, "w") as json_file: + json.dump(reportJson, json_file, indent=4) + print( + f"{icons.green_dot} The report.json file for the '{report}' report has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + ) + + return reportJson + + +def report_dependency_tree(workspace: Optional[str] = None): + """ + Prints a dependency between reports and semantic models. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspaceId = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspaceId) + + dfR = fabric.list_reports(workspace=workspace) + dfD = fabric.list_datasets(workspace=workspace) + dfR = pd.merge( + dfR, + dfD[["Dataset ID", "Dataset Name"]], + left_on="Dataset Id", + right_on="Dataset ID", + how="left", + ) + dfR.rename(columns={"Name": "Report Name"}, inplace=True) + dfR = dfR[["Report Name", "Dataset Name"]] + + report_icon = "\U0001F4F6" + dataset_icon = "\U0001F9CA" + workspace_icon = "\U0001F465" + + node_dict = {} + rootNode = Node(workspace) + node_dict[workspace] = rootNode + rootNode.custom_property = workspace_icon + " " + + for i, r in dfR.iterrows(): + datasetName = r["Dataset Name"] + reportName = r["Report Name"] + parentNode = node_dict.get(datasetName) + if parentNode is None: + parentNode = Node(datasetName, parent=rootNode) + node_dict[datasetName] = parentNode + parentNode.custom_property = dataset_icon + " " + + child_node = Node(reportName, parent=parentNode) + child_node.custom_property = report_icon + " " + + # Print the tree structure + for pre, _, node in RenderTree(node_dict[workspace]): + print(f"{pre}{node.custom_property}'{node.name}'") + + +@log +def export_report( + report: str, + export_format: str, + file_name: Optional[str] = None, + bookmark_name: Optional[str] = None, + page_name: Optional[str] = None, + visual_name: Optional[str] = None, + report_filter: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Exports a Power BI report to a file in your lakehouse. + + Parameters + ---------- + report : str + Name of the Power BI report. + export_format : str + The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. + file_name : str, default=None + The name of the file to be saved within the lakehouse. Do not include the file extension. Defaults ot the reportName parameter value. + bookmark_name : str, default=None + The name (GUID) of a bookmark within the report. + page_name : str, default=None + The name (GUID) of the report page. + visual_name : str, default=None + The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. + report_filter : str, default=None + A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + # https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group + + lakeAttach = lakehouse_attached() + + if lakeAttach == False: + print( + f"{icons.red_dot} In order to run the 'export_report' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + if isinstance(page_name, str): + page_name = [page_name] + if isinstance(visual_name, str): + visual_name = [visual_name] + + if bookmark_name is not None and (page_name is not None or visual_name is not None): + print( + f"{icons.red_dot} If the 'bookmark_name' parameter is set, the 'page_name' and 'visual_name' parameters must not be set." + ) + return + if visual_name is not None and page_name is None: + print( + f"{icons.red_dot} If the 'visual_name' parameter is set, the 'page_name' parameter must be set." + ) + return + + validFormats = { + "ACCESSIBLEPDF": ".pdf", + "CSV": ".csv", + "DOCX": ".docx", + "MHTML": ".mhtml", + "PDF": ".pdf", + "PNG": ".png", + "PPTX": ".pptx", + "XLSX": ".xlsx", + "XML": ".xml", + "BMP": ".bmp", + "EMF": ".emf", + "GIF": ".gif", + "JPEG": ".jpeg", + "TIFF": ".tiff", + } + + export_format = export_format.upper() + if export_format not in validFormats: + print( + f"{icons.red_dot} The '{export_format}' format is not a valid format for exporting Power BI reports. Please enter a valid format. Options: {validFormats}" + ) + return + + fileExt = validFormats.get(export_format) + + if file_name == None: + file_name = report + fileExt + else: + file_name = file_name + fileExt + + folderPath = "/lakehouse/default/Files" + filePath = os.path.join(folderPath, file_name) + + dfI = fabric.list_items(workspace=workspace) + dfI_filt = dfI[ + (dfI["Type"].isin(["Report", "PaginatedReport"])) + & (dfI["Display Name"] == report) + ] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist in the '{workspace}' workspace." + ) + return + + reportType = dfI_filt["Type"].iloc[0] + + # Limitations + pbiOnly = ["PNG"] + paginatedOnly = [ + "ACCESSIBLEPDF", + "CSV", + "DOCX", + "BMP", + "EMF", + "GIF", + "JPEG", + "TIFF", + "MHTML", + "XLSX", + "XML", + ] + + if reportType == "Report" and export_format in paginatedOnly: + print( + f"{icons.red_dot} The '{export_format}' format is only supported for paginated reports." + ) + return + if reportType == "PaginatedReport" and export_format in pbiOnly: + print( + f"{icons.red_dot} The '{export_format}' format is only supported for Power BI reports." + ) + return + + if reportType == "PaginatedReport" and ( + bookmark_name is not None or page_name is not None or visual_name is not None + ): + print( + f"{icons.red_dot} Export for paginated reports does not support bookmarks/pages/visuals. Those parameters must not be set for paginated reports." + ) + return + + reportId = dfI_filt["Id"].iloc[0] + client = fabric.PowerBIRestClient() + + dfVisual = list_report_visuals(report=report, workspace=workspace) + dfPage = list_report_pages(report=report, workspace=workspace) + + if ( + export_format in ["BMP", "EMF", "GIF", "JPEG", "TIFF"] + and reportType == "PaginatedReport" + ): + request_body = { + "format": "IMAGE", + "paginatedReportConfiguration": { + "formatSettings": {"OutputFormat": export_format.lower()} + }, + } + elif bookmark_name is None and page_name is None and visual_name is None: + request_body = {"format": export_format} + elif bookmark_name is not None: + if reportType == "Report": + request_body = { + "format": export_format, + "powerBIReportConfiguration": { + "defaultBookmark": {"name": bookmark_name} + }, + } + elif page_name is not None and visual_name is None: + if reportType == "Report": + request_body = {"format": export_format, "powerBIReportConfiguration": {}} + + request_body["powerBIReportConfiguration"]["pages"] = [] + + for page in page_name: + dfPage_filt = dfPage[dfPage["Page ID"] == page] + if len(dfPage_filt) == 0: + print( + f"{icons.red_dot} The '{page}' page does not exist in the '{report}' report within the '{workspace}' workspace." + ) + return + page_dict = {"pageName": page} + request_body["powerBIReportConfiguration"]["pages"].append(page_dict) + + elif page_name is not None and visual_name is not None: + if len(page_name) != len(visual_name): + print( + f"{icons.red_dot} Each 'visual_name' must map to a single 'page_name'." + ) + return + if reportType == "Report": + request_body = {"format": export_format, "powerBIReportConfiguration": {}} + + request_body["powerBIReportConfiguration"]["pages"] = [] + a = 0 + for page in page_name: + visual = visual_name[a] + dfVisual_filt = dfVisual[ + (dfVisual["Page ID"] == page) & (dfVisual["Visual ID"] == visual) + ] + if len(dfVisual_filt) == 0: + print( + f"{icons.red_dot} The '{visual}' visual does not exist on the '{page}' in the '{report}' report within the '{workspace}' workspace." + ) + return + page_dict = {"pageName": page, "visualName": visual} + request_body["powerBIReportConfiguration"]["pages"].append(page_dict) + a += 1 + + # Transform and add report filter if it is specified + if report_filter is not None and reportType == "Report": + reportFilter = generate_embedded_filter(filter=report_filter) + report_level_filter = {"filter": reportFilter} + + if "powerBIReportConfiguration" not in request_body: + request_body["powerBIReportConfiguration"] = {} + request_body["powerBIReportConfiguration"]["reportLevelFilters"] = [ + report_level_filter + ] + print(request_body) + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/ExportTo", + json=request_body, + ) + if response.status_code == 202: + response_body = json.loads(response.content) + exportId = response_body["id"] + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}" + ) + response_body = json.loads(response.content) + while response_body["status"] not in ["Succeeded", "Failed"]: + time.sleep(3) + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}" + ) + response_body = json.loads(response.content) + if response_body["status"] == "Failed": + print( + f"{icons.red_dot} The export for the '{report}' report within the '{workspace}' workspace in the '{export_format}' format has failed." + ) + else: + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}/file" + ) + print( + f"{icons.in_progress} Saving the '{export_format}' export for the '{report}' report within the '{workspace}' workspace to the lakehouse..." + ) + with open(filePath, "wb") as export_file: + export_file.write(response.content) + print( + f"{icons.green_dot} The '{export_format}' export for the '{report}' report within the '{workspace}' workspace has been saved to the following location: '{filePath}'." + ) + + +def clone_report( + report: str, + cloned_report: str, + workspace: Optional[str] = None, + target_workspace: Optional[str] = None, + target_dataset: Optional[str] = None, +): + """ + Clones a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + cloned_report : str + Name of the new Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + target_workspace : str, default=None + The name of the Fabric workspace to place the cloned report. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + target_dataset : str, default=None + The name of the semantic model to be used by the cloned report. + Defaults to None which resolves to the semantic model used by the initial report. + """ + + # https://learn.microsoft.com/rest/api/power-bi/reports/clone-report-in-group + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + dfI = fabric.list_items(workspace=workspace, type="Report") + dfI_filt = dfI[(dfI["Display Name"] == report)] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist within the '{workspace}' workspace." + ) + return + + reportId = resolve_report_id(report, workspace) + + if target_workspace is None: + target_workspace = workspace + target_workspace_id = workspace_id + else: + dfW = fabric.list_workspaces() + dfW_filt = dfW[dfW["Name"] == target_workspace] + + if len(dfW_filt) == 0: + print(f"{icons.red_dot} The '{workspace}' is not a valid workspace.") + return + target_workspace_id = dfW_filt["Id"].iloc[0] + + if target_dataset == None: + dfR = fabric.list_reports(workspace=target_workspace) + dfR_filt = dfR[dfR["Name"] == report] + target_dataset_id = dfR_filt["Dataset Id"].iloc[0] + target_dataset = resolve_dataset_name( + dataset_id=target_dataset_id, workspace=target_workspace + ) + else: + dfD = fabric.list_datasets(workspace=target_workspace) + dfD_filt = dfD[dfD["Dataset Name"] == target_dataset] + + if len(dfD_filt) == 0: + print( + f"{icons.red_dot} The '{target_dataset}' target dataset does not exist in the '{target_workspace}' workspace." + ) + return + target_dataset_id = dfD_filt["Dataset Id"].iloc[0] + + client = fabric.PowerBIRestClient() + + if target_workspace is None and target_dataset is None: + request_body = {"name": cloned_report} + elif target_workspace is not None and target_dataset is None: + request_body = {"name": cloned_report, "targetWorkspaceId": target_workspace_id} + elif target_workspace is not None and target_dataset is not None: + request_body = { + "name": cloned_report, + "targetModelId": target_dataset_id, + "targetWorkspaceId": target_workspace_id, + } + elif target_workspace is None and target_dataset is not None: + request_body = {"name": cloned_report, "targetModelId": target_dataset_id} + + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/Clone", json=request_body + ) + + if response.status_code == 200: + print( + f"{icons.green_dot} The '{report}' report has been successfully cloned as the '{cloned_report}' report within the '{target_workspace}' workspace using the '{target_dataset}' semantic model." + ) + else: + print( + f"{icons.red_dot} POST request failed with status code: {response.status_code}" + ) + + +def launch_report(report: str, workspace: Optional[str] = None): + """ + Shows a Power BI report within a Fabric notebook. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str + An embedded Power BI report within the notebook. + """ + + from .HelperFunctions import resolve_report_id + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + reportId = resolve_report_id(report, workspace) + + report = Report(group_id=workspace_id, report_id=reportId) + + return report + + +def list_report_pages(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all pages within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the pages within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + df = pd.DataFrame( + columns=["Page ID", "Page Name", "Hidden", "Width", "Height", "Visual Count"] + ) + + reportJson = get_report_json(report=report, workspace=workspace) + + for section in reportJson["sections"]: + pageID = section["name"] + pageName = section["displayName"] + # pageFilters = section['filters'] + pageWidth = section["width"] + pageHeight = section["height"] + visualCount = len(section["visualContainers"]) + pageHidden = False + pageConfig = section["config"] + pageConfigJson = json.loads(pageConfig) + + try: + pageH = pageConfigJson["visibility"] + if pageH == 1: + pageHidden = True + except: + pass + + new_data = { + "Page ID": pageID, + "Page Name": pageName, + "Hidden": pageHidden, + "Width": pageWidth, + "Height": pageHeight, + "Visual Count": visualCount, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["Hidden"] = df["Hidden"].astype(bool) + intCol = ["Width", "Height", "Visual Count"] + df[intCol] = df[intCol].astype(int) + + return df + + +def list_report_visuals(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all visuals within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the visuals within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + reportJson = get_report_json(report=report, workspace=workspace) + + df = pd.DataFrame(columns=["Page Name", "Page ID", "Visual ID", "Title"]) + + for section in reportJson["sections"]: + pageID = section["name"] + pageName = section["displayName"] + + for visual in section["visualContainers"]: + visualConfig = visual["config"] + visualConfigJson = json.loads(visualConfig) + visualID = visualConfigJson["name"] + + try: + title = visualConfigJson["singleVisual"]["vcObjects"]["title"][0][ + "properties" + ]["text"]["expr"]["Literal"]["Value"] + title = title[1:-1] + except: + title = "" + + new_data = { + "Page Name": pageName, + "Page ID": pageID, + "Visual ID": visualID, + "Title": title, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def list_report_bookmarks(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all bookmarks within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the bookmarks within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + df = pd.DataFrame( + columns=[ + "Bookmark ID", + "Bookmark Name", + "Page ID", + "Visual ID", + "Visual Hidden", + ] + ) + + reportJson = get_report_json(report=report, workspace=workspace) + reportConfig = reportJson["config"] + reportConfigJson = json.loads(reportConfig) + + try: + for bookmark in reportConfigJson["bookmarks"]: + bID = bookmark["name"] + bName = bookmark["displayName"] + rptPageId = bookmark["explorationState"]["activeSection"] + + for rptPg in bookmark["explorationState"]["sections"]: + for vc in bookmark["explorationState"]["sections"][rptPg][ + "visualContainers" + ]: + vHidden = False + try: + hidden = bookmark["explorationState"]["sections"][rptPg][ + "visualContainers" + ][vc]["singleVisual"]["display"]["mode"] + if hidden == "hidden": + vHidden = True + except: + pass + + new_data = { + "Bookmark ID": bID, + "Bookmark Name": bName, + "Page ID": rptPageId, + "Visual ID": vc, + "Visual Hidden": vHidden, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + listPages = list_report_pages(report=report, workspace=workspace) + + df = pd.merge(df, listPages[["Page ID", "Page Name"]], on="Page ID", how="left") + df = df[ + [ + "Bookmark ID", + "Bookmark Name", + "Page ID", + "Page Name", + "Visual ID", + "Visual Hidden", + ] + ] + + return df + + except: + print( + f"The '{report}' report within the '{workspace}' workspace has no bookmarks." + ) + + +def translate_report_titles( + report: str, languages: Union[str, List[str]], workspace: Optional[str] = None +): + """ + Dynamically generates new Power BI reports which have report titles translated into the specified language(s). + + Parameters + ---------- + report : str + Name of the Power BI report. + languages : str, List[str] + The language code(s) in which to translate the report titles. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if isinstance(languages, str): + languages = [languages] + + for lang in languages: + language_validate(lang) + + reportJson = get_report_json(report=report, workspace=workspace) + dfV = list_report_visuals(report=report, workspace=workspace) + spark = SparkSession.builder.getOrCreate() + df = spark.createDataFrame(dfV) + columnToTranslate = "Title" + + translate = ( + Translate() + .setTextCol(columnToTranslate) + .setToLanguage(languages) + .setOutputCol("translation") + .setConcurrency(5) + ) + + transDF = ( + translate.transform(df) + .withColumn("translation", flatten(col("translation.translations"))) + .withColumn("translation", col("translation.text")) + .select("Visual ID", columnToTranslate, "translation") + ) + + df_panda = transDF.toPandas() + + i = 0 + for lang in languages: + # Clone report + language = language_validate(lang) + clonedReportName = f"{report}_{language}" + + dfRep = fabric.list_reports(workspace=workspace) + dfRep_filt = dfRep[ + (dfRep["Name"] == clonedReportName) + & (dfRep["Report Type"] == "PowerBIReport") + ] + + if len(dfRep_filt) > 0: + print( + f"{icons.yellow_dot} The '{clonedReportName}' report already exists in the '{workspace} workspace." + ) + else: + clone_report( + report=report, cloned_report=clonedReportName, workspace=workspace + ) + print( + f"{icons.green_dot} The '{clonedReportName}' report has been created via clone in the '{workspace} workspace." + ) + + rptJsonTr = copy.deepcopy(reportJson) + + # Update report json file + for section in rptJsonTr["sections"]: + for visual in section["visualContainers"]: + visualConfig = visual["config"] + visualConfigJson = json.loads(visualConfig) + visualID = visualConfigJson["name"] + + df_filt = df_panda[ + (df_panda["Visual ID"] == visualID) & (df_panda["Title"] != "") + ] + + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + if len(tr) > 0: + prop = visualConfigJson["singleVisual"]["vcObjects"]["title"][ + 0 + ]["properties"]["text"]["expr"]["Literal"] + prop["Value"] = f"'{tr}'" + + visual["config"] = json.dumps(visualConfigJson) + + i += 1 + + # Post updated report json file to cloned report + update_report_from_reportjson( + report=clonedReportName, report_json=rptJsonTr, workspace=workspace + ) + print( + f"{icons.green_dot} The visual titles within the '{clonedReportName}' report within the '{workspace}' have been translated into '{language}' accordingly." + ) diff --git a/sempy_labs/ReportRebind.py b/sempy_labs/report/_report_rebind.py similarity index 67% rename from sempy_labs/ReportRebind.py rename to sempy_labs/report/_report_rebind.py index 844cc0b1..c86fecfc 100644 --- a/sempy_labs/ReportRebind.py +++ b/sempy_labs/report/_report_rebind.py @@ -3,15 +3,16 @@ from .HelperFunctions import resolve_dataset_id, resolve_report_id from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log -def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = None, dataset_workspace: Optional[str] = None): - +def report_rebind( + report: str, + dataset: str, + report_workspace: Optional[str] = None, + dataset_workspace: Optional[str] = None, +): """ Rebinds a report to a semantic model. @@ -32,7 +33,7 @@ def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = N Returns ------- - + """ if report_workspace == None: @@ -41,28 +42,39 @@ def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = N else: report_workspace_id = fabric.resolve_workspace_id(report_workspace) if dataset_workspace == None: - dataset_workspace = report_workspace + dataset_workspace = report_workspace client = fabric.PowerBIRestClient() - reportId = resolve_report_id(report = report, workspace = report_workspace) - datasetId = resolve_dataset_id(dataset = dataset, workspace = dataset_workspace) + reportId = resolve_report_id(report=report, workspace=report_workspace) + datasetId = resolve_dataset_id(dataset=dataset, workspace=dataset_workspace) # Prepare API - request_body = { - 'datasetId': datasetId - } + request_body = {"datasetId": datasetId} - response = client.post(f"/v1.0/myorg/groups/{report_workspace_id}/reports/{reportId}/Rebind",json=request_body) + response = client.post( + f"/v1.0/myorg/groups/{report_workspace_id}/reports/{reportId}/Rebind", + json=request_body, + ) if response.status_code == 200: - print(f"{green_dot} The '{report}' report has been successfully rebinded to the '{dataset}' semantic model.") + print( + f"{icons.green_dot} The '{report}' report has been successfully rebinded to the '{dataset}' semantic model." + ) else: - print(f"{red_dot} The '{report}' report within the '{report_workspace}' workspace failed to rebind to the '{dataset}' semantic model within the '{dataset_workspace}' workspace.") + print( + f"{icons.red_dot} The '{report}' report within the '{report_workspace}' workspace failed to rebind to the '{dataset}' semantic model within the '{dataset_workspace}' workspace." + ) -@log -def report_rebind_all(dataset: str, new_dataset: str, dataset_workspace: Optional[str] = None, new_dataset_workpace: Optional[str] = None, report_workspace: Optional[str] = None): +@log +def report_rebind_all( + dataset: str, + new_dataset: str, + dataset_workspace: Optional[str] = None, + new_dataset_workpace: Optional[str] = None, + report_workspace: Optional[str] = None, +): """ Rebinds all reports in a workspace which are bound to a specific semantic model to a new semantic model. @@ -86,29 +98,34 @@ def report_rebind_all(dataset: str, new_dataset: str, dataset_workspace: Optiona The name of the Fabric workspace in which the report resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if dataset_workspace == None: dataset_workspace_id = fabric.get_workspace_id() dataset_workspace = fabric.resolve_workspace_name(dataset_workspace_id) else: - dataset_workspace_id = fabric.resolve_workspace_id(dataset_workspace) + dataset_workspace_id = fabric.resolve_workspace_id(dataset_workspace) if new_dataset_workpace == None: new_dataset_workpace = dataset_workspace if report_workspace == None: report_workspace = dataset_workspace - + datasetId = resolve_dataset_id(dataset, dataset_workspace) - dfRep = fabric.list_reports(workspace = report_workspace) - dfRep_filt = dfRep[dfRep['Dataset Id'] == datasetId] + dfRep = fabric.list_reports(workspace=report_workspace) + dfRep_filt = dfRep[dfRep["Dataset Id"] == datasetId] for i, r in dfRep_filt.iterrows(): - rptName = r['Name'] - report_rebind(report = rptName, dataset = new_dataset, report_workspace = report_workspace, dataset_workspace = new_dataset_workpace) \ No newline at end of file + rptName = r["Name"] + report_rebind( + report=rptName, + dataset=new_dataset, + report_workspace=report_workspace, + dataset_workspace=new_dataset_workpace, + ) diff --git a/sempy_labs/shortcuts.py b/sempy_labs/shortcuts.py index 8a246bff..9be99197 100644 --- a/sempy_labs/shortcuts.py +++ b/sempy_labs/shortcuts.py @@ -1,16 +1,19 @@ import sempy_labs import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import resolve_lakehouse_name, resolve_lakehouse_id +from ._helper_functions import resolve_lakehouse_name, resolve_lakehouse_id from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def create_shortcut_onelake(table_name: str, source_lakehouse: str, source_workspace: str, destination_lakehouse: str, destination_workspace: Optional[str] = None, shortcut_name: Optional[str] = None): +def create_shortcut_onelake( + table_name: str, + source_lakehouse: str, + source_workspace: str, + destination_lakehouse: str, + destination_workspace: Optional[str] = None, + shortcut_name: Optional[str] = None, +): """ Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to a delta table in OneLake. @@ -28,12 +31,12 @@ def create_shortcut_onelake(table_name: str, source_lakehouse: str, source_works The name of the Fabric workspace in which the shortcut will be created. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - shortcut_name : str, default=None - The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. - + shortcut_name : str, default=None + The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. + Returns ------- - + """ sourceWorkspaceId = fabric.resolve_workspace_id(source_workspace) @@ -41,38 +44,56 @@ def create_shortcut_onelake(table_name: str, source_lakehouse: str, source_works if destination_workspace == None: destination_workspace = source_workspace - + destinationWorkspaceId = fabric.resolve_workspace_id(destination_workspace) - destinationLakehouseId = resolve_lakehouse_id(destination_lakehouse, destination_workspace) + destinationLakehouseId = resolve_lakehouse_id( + destination_lakehouse, destination_workspace + ) if shortcut_name == None: shortcut_name = table_name - + client = fabric.FabricRestClient() - tablePath = 'Tables/' + table_name + tablePath = "Tables/" + table_name request_body = { - "path": 'Tables', - "name": shortcut_name.replace(' ',''), - "target": { - "oneLake": { - "workspaceId": sourceWorkspaceId, - "itemId": sourceLakehouseId, - "path": tablePath} - } + "path": "Tables", + "name": shortcut_name.replace(" ", ""), + "target": { + "oneLake": { + "workspaceId": sourceWorkspaceId, + "itemId": sourceLakehouseId, + "path": tablePath, + } + }, } try: - response = client.post(f"/v1/workspaces/{destinationWorkspaceId}/items/{destinationLakehouseId}/shortcuts",json=request_body) + response = client.post( + f"/v1/workspaces/{destinationWorkspaceId}/items/{destinationLakehouseId}/shortcuts", + json=request_body, + ) if response.status_code == 201: - print(f"{green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace.") + print( + f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace." + ) else: print(response.status_code) except Exception as e: - print(f"{red_dot} Failed to create a shortcut for the '{table_name}' table: {e}") - -def create_shortcut(shortcut_name: str, location: str, subpath: str, source: str, connection_id: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): - + print( + f"{icons.red_dot} Failed to create a shortcut for the '{table_name}' table: {e}" + ) + + +def create_shortcut( + shortcut_name: str, + location: str, + subpath: str, + source: str, + connection_id: str, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): """ Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to an ADLS Gen2 or Amazon S3 source. @@ -88,22 +109,21 @@ def create_shortcut(shortcut_name: str, location: str, subpath: str, source: str workspace : str, default=None The name of the Fabric workspace in which the shortcut will be created. Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - + or if no lakehouse attached, resolves to the workspace of the notebook. + Returns ------- - + """ - source_titles = { - 'adlsGen2': 'ADLS Gen2', - 'amazonS3': 'Amazon S3' - } + source_titles = {"adlsGen2": "ADLS Gen2", "amazonS3": "Amazon S3"} sourceValues = list(source_titles.keys()) if source not in sourceValues: - print(f"{red_dot} The 'source' parameter must be one of these values: {sourceValues}.") + print( + f"{icons.red_dot} The 'source' parameter must be one of these values: {sourceValues}." + ) return sourceTitle = source_titles[source] @@ -118,32 +138,40 @@ def create_shortcut(shortcut_name: str, location: str, subpath: str, source: str lakehouse_id = fabric.get_lakehouse_id() else: lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - + client = fabric.FabricRestClient() - shortcutActualName = shortcut_name.replace(' ','') + shortcutActualName = shortcut_name.replace(" ", "") request_body = { - "path": 'Tables', - "name": shortcutActualName, - "target": { - source: { - "location": location, - "subpath": subpath, - "connectionId": connection_id} - } + "path": "Tables", + "name": shortcutActualName, + "target": { + source: { + "location": location, + "subpath": subpath, + "connectionId": connection_id, + } + }, } try: - response = client.post(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts",json=request_body) + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts", + json=request_body, + ) if response.status_code == 201: - print(f"{green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'.") + print( + f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'." + ) else: print(response.status_code) except: - print(f"{red_dot} Failed to create a shortcut for the '{shortcut_name}' table.") + print( + f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table." + ) -def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = None): """ Shows all shortcuts which exist in a Fabric lakehouse. @@ -155,8 +183,8 @@ def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = N workspace : str, default=None The name of the Fabric workspace in which lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - + or if no lakehouse attached, resolves to the workspace of the notebook. + Returns ------- pandas.DataFrame @@ -168,42 +196,79 @@ def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = N workspace = fabric.resolve_workspace_name(workspace_id) else: workspace_id = fabric.resolve_workspace_id(workspace) - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) else: lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - df = pd.DataFrame(columns=['Shortcut Name', 'Shortcut Path', 'Source', 'Source Lakehouse Name', 'Source Workspace Name', 'Source Path', 'Source Connection ID', 'Source Location', 'Source SubPath']) + df = pd.DataFrame( + columns=[ + "Shortcut Name", + "Shortcut Path", + "Source", + "Source Lakehouse Name", + "Source Workspace Name", + "Source Path", + "Source Connection ID", + "Source Location", + "Source SubPath", + ] + ) client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts") + response = client.get( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts" + ) if response.status_code == 200: - for s in response.json()['value']: - shortcutName = s['name'] - shortcutPath = s['path'] - source = list(s['target'].keys())[0] - sourceLakehouseName, sourceWorkspaceName, sourcePath, connectionId, location, subpath = None, None, None, None, None, None - if source == 'oneLake': - sourceLakehouseId = s['target'][source]['itemId'] - sourcePath = s['target'][source]['path'] - sourceWorkspaceId = s['target'][source]['workspaceId'] + for s in response.json()["value"]: + shortcutName = s["name"] + shortcutPath = s["path"] + source = list(s["target"].keys())[0] + ( + sourceLakehouseName, + sourceWorkspaceName, + sourcePath, + connectionId, + location, + subpath, + ) = (None, None, None, None, None, None) + if source == "oneLake": + sourceLakehouseId = s["target"][source]["itemId"] + sourcePath = s["target"][source]["path"] + sourceWorkspaceId = s["target"][source]["workspaceId"] sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId) - sourceLakehouseName = resolve_lakehouse_name(sourceLakehouseId, sourceWorkspaceName) + sourceLakehouseName = resolve_lakehouse_name( + sourceLakehouseId, sourceWorkspaceName + ) else: - connectionId = s['target'][source]['connectionId'] - location = s['target'][source]['location'] - subpath = s['target'][source]['subpath'] - - new_data = {'Shortcut Name': shortcutName, 'Shortcut Path': shortcutPath, 'Source': source, 'Source Lakehouse Name': sourceLakehouseName, 'Source Workspace Name': sourceWorkspaceName, 'Source Path': sourcePath, 'Source Connection ID': connectionId, 'Source Location': location, 'Source SubPath': subpath} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - print(f"This function relies on an API which is not yet official as of May 21, 2024. Once the API becomes official this function will work as expected.") + connectionId = s["target"][source]["connectionId"] + location = s["target"][source]["location"] + subpath = s["target"][source]["subpath"] + + new_data = { + "Shortcut Name": shortcutName, + "Shortcut Path": shortcutPath, + "Source": source, + "Source Lakehouse Name": sourceLakehouseName, + "Source Workspace Name": sourceWorkspaceName, + "Source Path": sourcePath, + "Source Connection ID": connectionId, + "Source Location": location, + "Source SubPath": subpath, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + print( + f"This function relies on an API which is not yet official as of May 21, 2024. Once the API becomes official this function will work as expected." + ) return df -def delete_shortcut(shortcut_name: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def delete_shortcut( + shortcut_name: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None +): """ Deletes a shortcut. @@ -217,19 +282,19 @@ def delete_shortcut(shortcut_name: str, lakehouse: Optional[str] = None, workspa workspace : str, default=None The name of the Fabric workspace in which lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - + or if no lakehouse attached, resolves to the workspace of the notebook. + Returns ------- - - """ + + """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) else: workspace_id = fabric.resolve_workspace_id(workspace) - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) @@ -237,9 +302,13 @@ def delete_shortcut(shortcut_name: str, lakehouse: Optional[str] = None, workspa lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) client = fabric.FabricRestClient() - response = client.delete(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}") - + response = client.delete( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}" + ) + if response.status_code == 200: - print(f"{green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted.") + print( + f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted." + ) else: - print(f"{red_dot} The '{shortcut_name}' has not been deleted.") \ No newline at end of file + print(f"{icons.red_dot} The '{shortcut_name}' has not been deleted.") From b8e757fd5ceaf49a1789c8000c688eecdd1b2823 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Sat, 8 Jun 2024 09:45:55 +0000 Subject: [PATCH 14/23] add intersphinx --- docs/source/conf.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 903744fb..db597126 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,17 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon'] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + "sphinx.ext.intersphinx", +] + +intersphinx_mapping = { + 'python': ('http://docs.python.org/', None), + 'numpy': ('https://numpy.org/doc/stable/', None), + 'pandas': ('http://pandas.pydata.org/pandas-docs/dev', None) +} templates_path = ['_templates'] exclude_patterns = [] From 7689c0f943886d0ffa3357ed1490b47ea5b2cf71 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 10 Jun 2024 11:12:29 +0000 Subject: [PATCH 15/23] update readme mock powerbiclient --- README.md | 3819 +------------------------------------------ docs/source/conf.py | 2 +- 2 files changed, 4 insertions(+), 3817 deletions(-) diff --git a/README.md b/README.md index dfc0df0a..1c5c9649 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # semantic-link-labs [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs) +[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.4.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs) -[![Read The Docs](https://readthedocs.org/projects//badge/?version=0.4.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) This is a python library intended to be used in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library was originally intended to contain functions used for [migrating semantic models to Direct Lake mode](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration-1). However, it quickly became apparent that functions within such a library could support many other useful activities in the realm of semantic models, reports, lakehouses and really anything Fabric-related. As such, this library contains a variety of functions ranging from running [Vertipaq Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#vertipaq_analyzer) or the [Best Practice Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#run_model_bpa) against a semantic model to seeing if any [lakehouse tables hit Direct Lake guardrails](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#get_lakehouse_tables) or accessing the [Tabular Object Model](https://github.com/microsoft/semantic-link-labs/#tabular-object-model-tom) and more! @@ -216,3821 +218,6 @@ An even better way to ensure the semantic-link-labs library is available in your * [is_field_parameter](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#is_field_parameter) -# Functions -## cancel_dataset_refresh -#### Cancels the refresh of a semantic model which was executed via the [Enhanced Refresh API](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh). -```python -import semantic-link-labs as labs -labs.cancel_dataset_refresh( - dataset = 'MyReport', - #request_id = None, - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **request_id** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The [request id](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh#response-properties) of a semantic model refresh. Defaults to finding the latest active refresh of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## check_fallback_reason -#### Shows the reason a table in a Direct Lake semantic model would fallback to Direct Query. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.check_fallback_reason( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> Pandas dataframe showing the tables in the semantic model and their fallback reason. - ---- -## clear_cache -#### Clears the cache of a semantic model. -```python -import semantic-link-labs as labs -labs.clear_cache( - dataset = 'AdventureWorks', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## clone_report -#### Makes a clone of a Power BI report -```python -import semantic-link-labs as labs -labs.clone_report( - report = 'MyReport', - cloned_report = 'MyNewReport', - #workspace = None, - #target_workspace = None, - #target_dataset = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report to be cloned. -> -> **cloned_report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the new report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the original report resides. -> -> **target_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the new report will reside. Defaults to using the workspace in which the original report resides. -> -> **target_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The semantic model from which the new report will be connected. Defaults to using the semantic model used by the original report. -### Returns -> A printout stating the success/failure of the operation. - ---- -## control_fallback -#### Set the DirectLakeBehavior for a semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.control_fallback( - dataset = 'AdventureWorks', - direct_lake_behavior = 'DirectLakeOnly', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **direct_lake_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Setting for Direct Lake Behavior. Options: ('Automatic', 'DirectLakeOnly', 'DirectQueryOnly'). -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_blank_semantic_model -#### Creates a new blank semantic model (no tables/columns etc.). -```python -import semantic-link-labs as labs -labs.create_blank_semantic_model( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **compatibility_level** [int](https://docs.python.org/3/library/functions.html#int) -> ->> Optional; Setting for the compatibility level of the semantic model. Default value: 1605. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_pqt_file -#### Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. -```python -import semantic-link-labs as labs -labs.create_pqt_file( - dataset = 'AdventureWorks', - #file_name = 'PowerQueryTemplate', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **file_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; TName of the Power Query Template (.pqt) file to be created. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_report_from_reportjson -#### Creates a report based on a report.json file (and an optional themes.json file). -```python -import semantic-link-labs as labs -labs.create_report_from_reportjson( - report = 'MyReport', - dataset = 'AdventureWorks', - report_json = '', - #theme_json = '', - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to connect to the report. -> -> **report_json** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The report.json file to be used to create the report. -> -> **theme_json** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The theme.json file to be used for the theme of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_semantic_model_from_bim -#### Creates a new semantic model based on a Model.bim file. -```python -import semantic-link-labs as labs -labs.create_semantic_model_from_bim( - dataset = 'AdventureWorks', - bim_file = '', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **bim_file** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The model.bim file to be used to create the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_shortcut_onelake -#### Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to a delta table in OneLake. -```python -import semantic-link-labs as labs -labs.create_shortcut_onelake( - table_name = 'DimCalendar', - source_lakehouse = 'Lakehouse1', - source_workspace = 'Workspace1', - destination_lakehouse = 'Lakehouse2', - #destination_workspace = '', - shortcut_name = 'Calendar' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The table name for which a shortcut will be created. -> -> **source_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The lakehouse in which the table resides. -> -> **sourceWorkspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The workspace where the source lakehouse resides. -> -> **destination_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The lakehouse where the shortcut will be created. -> -> **destination_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the shortcut will be created. Defaults to the 'sourceWorkspaceName' parameter value. -> -> **shortcut_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the shortcut 'table' to be created. This defaults to the 'tableName' parameter value. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_warehouse -#### Creates a warehouse in Fabric. -```python -import semantic-link-labs as labs -labs.create_warehouse( - warehouse = 'MyWarehouse', - workspace = None - ) -``` -### Parameters -> **warehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the warehouse. -> -> **description** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; Description of the warehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the warehouse will reside. -### Returns -> A printout stating the success/failure of the operation. - ---- -## delete_shortcut -#### Deletes a [OneLake shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts). -```python -import semantic-link-labs as labs -labs.delete_shortcut( - shortcut_name = 'DimCalendar', - lakehouse = 'Lakehouse1', - workspace = 'Workspace1' - ) -``` -### Parameters -> **shortcut_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the OneLake shortcut to delete. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse in which the shortcut resides. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## direct_lake_schema_compare -#### Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.direct_lake_schema_compare( - dataset = 'AdventureWorks', - workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> Shows tables/columns which exist in the semantic model but do not exist in the corresponding lakehouse. - ---- -## direct_lake_schema_sync -#### Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model). -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.direct_lake_schema_sync( - dataset = 'AdvWorks', - add_to_model = True, - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **add_to_model** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds columns which exist in the lakehouse but do not exist in the semantic model. No new tables are added. Default value: False. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> A list of columns which exist in the lakehouse but not in the Direct Lake semantic model. If 'add_to_model' is set to True, a printout stating the success/failure of the operation is returned. - ---- -## export_model_to_onelake -#### Exports a semantic model's tables to delta tables in the lakehouse. Creates shortcuts to the tables if a lakehouse is specified. -> [!IMPORTANT] -> This function requires: -> -> [XMLA read/write](https://learn.microsoft.com/power-bi/enterprise/service-premium-connect-tools#enable-xmla-read-write) to be enabled on the Fabric capacity. -> -> [OneLake Integration](https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration) feature to be enabled within the semantic model settings. -```python -import semantic-link-labs as labs -labs.export_model_to_onelake( - dataset = 'AdventureWorks', - workspace = None, - destination_lakehouse = 'Lakehouse2', - destination_workspace = 'Workspace2' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **destination_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. -> -> **destination_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## export_report -#### Exports a Power BI report to a file in your lakehouse. -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #file_name = None, - #bookmark_name = None, - #page_name = None, - #visual_name = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #file_name = 'Exports\MyReport', - #bookmark_name = None, - #page_name = 'ReportSection293847182375', - #visual_name = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = 'ReportSection293847182375', - #report_filter = "'Product Category'[Color] in ('Blue', 'Orange') and 'Calendar'[CalendarYear] <= 2020", - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = ['ReportSection293847182375', 'ReportSection4818372483347'], - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = ['ReportSection293847182375', 'ReportSection4818372483347'], - #visual_name = ['d84793724739', 'v834729234723847'], - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **export_format** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. -> -> **file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the file to be saved within the lakehouse. Do **not** include the file extension. Defaults ot the reportName parameter value. -> -> **bookmark_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of a bookmark within the report. -> -> **page_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of the report page. -> -> **visual_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. -> -> **report_filter** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## generate_embedded_filter -#### Runs a DAX query against a semantic model. -```python -import semantic-link-labs as labs -labs.generate_embedded_filter( - filter = "'Product'[Product Category] = 'Bikes' and 'Geography'[Country Code] in (3, 6, 10)" - ) -``` -### Parameters -> **filter** [str](https://docs.python.org/3/library/stdtypes.html#str) -### Returns -> A string converting the filter into an [embedded filter](https://learn.microsoft.com/power-bi/collaborate-share/service-url-filters) - ---- -## get_direct_lake_guardrails -#### Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query based on Microsoft's online documentation. -```python -import semantic-link-labs as labs -labs.get_direct_lake_guardrails() -``` -### Parameters -None -### Returns -> A table showing the Direct Lake guardrails by SKU. - ---- -## get_directlake_guardrails_for_sku -#### Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity. -*Use the result of the 'get_sku_size' function as an input for this function's skuSize parameter.* -```python -import semantic-link-labs as labs -labs.get_directlake_guardrails_for_sku( - sku_size = '' - ) -``` -### Parameters -> **sku_size** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Sku size of a workspace/capacity -### Returns -> A table showing the Direct Lake guardrails for the given SKU. - ---- -## get_direct_lake_lakehouse -#### Identifies the lakehouse used by a Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.get_direct_lake_lakehouse( - dataset = 'AdventureWorks', - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse used by the semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. - ---- -## get_direct_lake_sql_endpoint -#### Identifies the lakehouse used by a Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.get_direct_lake_sql_endpoint( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the SQL Endpoint ID for a Direct Lake semantic model. - ---- -## get_lakehouse_columns -#### Shows the tables and columns of a lakehouse and their respective properties. -```python -import semantic-link-labs as labs -labs.get_lakehouse_columns( - lakehouse = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the tables/columns within a lakehouse and their properties. - ---- -## get_lakehouse_tables -#### Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. -```python -import semantic-link-labs as labs -labs.get_lakehouse_tables( - lakehouse = 'MyLakehouse', - workspace = 'NewWorkspace', - extended = True, - count_rows = True) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -> -> **extended** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds the following additional table properties \['Files', 'Row Groups', 'Table Size', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail'\]. Also indicates the SKU for the workspace and whether guardrails are hit. Default value: False. -> -> **count_rows** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds an additional column showing the row count of each table. Default value: False. -> -> **export** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; If specified as True, the resulting dataframe will be exported to a delta table in your lakehouse. -### Returns -> A pandas dataframe showing the delta tables within a lakehouse and their properties. - ---- -## get_measure_dependencies -#### Shows all dependencies for all measures in a semantic model -```python -import semantic-link-labs as labs -labs.get_measure_dependencies( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing all dependencies for all measures in the semantic model. - ---- -## get_model_calc_dependencies -#### Shows all dependencies for all objects in a semantic model -```python -import semantic-link-labs as labs -labs.get_model_calc_dependencies( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing all dependencies for all objects in the semantic model. - ---- -## get_object_level_security -#### Shows a list of columns used in object level security. -```python -import semantic-link-labs as labs -labs.get_object_level_security( - dataset = 'AdventureWorks', - workspace = '') -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The semantic model name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing the columns used in object level security within a semantic model. - ---- -## get_report_json -#### Gets the report.json file content of a Power BI report. -```python -import semantic-link-labs as labs -labs.get_report_json( - report = 'MyReport', - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.get_report_json( - report = 'MyReport', - #workspace = None, - save_to_file_name = 'MyFileName' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -> **save_to_file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying this parameter will save the report.json file to your lakehouse with the file name of this parameter. -### Returns -> The report.json file for a given Power BI report. - ---- -## get_semantic_model_bim -#### Extracts the Model.bim file for a given semantic model. -```python -import semantic-link-labs as labs -labs.get_semantic_model_bim( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.get_semantic_model_bim( - dataset = 'AdventureWorks', - #workspace = None, - save_to_file_name = 'MyFileName' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **save_to_file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying this parameter will save the model.bim file to your lakehouse with the file name of this parameter. -### Returns -> The model.bim file for a given semantic model. - ---- -## get_shared_expression -#### Dynamically generates the M expression used by a Direct Lake model for a given lakehouse. -```python -import semantic-link-labs as labs -labs.get_shared_expression( - lakehouse = '', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string showing the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint. - ---- -## get_sku_size -#### Shows the SKU size for a workspace. -```python -import semantic-link-labs as labs -labs.get_sku_size( - workspace = '' - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the SKU size for a workspace. - ---- -## import_vertipaq_analyzer -#### Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse. -```python -import semantic-link-labs as labs -labs.import_vertipaq_analyzer( - folder_path = '/lakehouse/default/Files/VertipaqAnalyzer', - file_name = 'Workspace Name-DatasetName.zip' - ) -``` -### Parameters -> **folder_path** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved. -> -> **file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; File name of the file which contains the vertipaq analyzer info. - ---- -## launch_report -#### Shows a Power BI report within a Fabric notebook. -```python -import semantic-link-labs as labs -labs.launch_report( - report = 'MyReport', - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the workspace in which the report resides. - ---- -## list_dashboards -#### Shows the dashboards within the workspace. -```python -import semantic-link-labs as labs -labs.list_dashboards( - #workspace = '' - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace name. -### Returns -> A pandas dataframe showing the dashboards which exist in the workspace. - ---- -## list_dataflow_storage_accounts -#### Shows the dataflow storage accounts. -```python -import semantic-link-labs as labs -labs.list_dataflow_storage_accounts() -``` -### Parameters -None -### Returns -> A pandas dataframe showing the accessible dataflow storage accounts. -> ---- -## list_direct_lake_model_calc_tables -#### Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.list_direct_lake_model_calc_tables( - dataset = 'AdventureWorks', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing the calculated tables which were migrated to Direct Lake and whose DAX expressions are stored as model annotations. - ---- -## list_lakehouses -#### Shows the properties associated with lakehouses in a workspace. -```python -import semantic-link-labs as labs -labs.list_lakehouses( - workspace = None - ) -``` -### Parameters -> **workspaceName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the properties of a all lakehouses in a workspace. - ---- -## list_semantic_model_objects -#### Shows a list of semantic model objects. -```python -import semantic-link-labs as labs -labs.list_semantic_model_objects( - dataset = 'AdvWorks', - workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A dataframe showing a list of objects in the semantic model - ---- -## list_shortcuts -#### Shows the shortcuts within a lakehouse (*note: the API behind this function is not yet available. The function will work as expected once the API is officially released*) -```python -import semantic-link-labs as labs -labs.list_shortcuts( - lakehouse = 'MyLakehouse', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the shortcuts which exist in a given lakehouse and their properties. - ---- -## list_warehouses -#### Shows the warehouss within a workspace. -```python -import semantic-link-labs as labs -labs.list_warehouses( - #workspace = None - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace name. -### Returns -> A pandas dataframe showing the warehouses which exist in a given workspace and their properties. - ---- -## measure_dependency_tree -#### Shows a measure dependency tree of all dependent objects for a measure in a semantic model. -```python -import semantic-link-labs as labs -labs.measure_dependency_tree( - dataset = 'AdventureWorks', - measure_name = 'Sales Amount', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **measure_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the measure to use for building a dependency tree. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A tree view showing the dependencies for a given measure within the semantic model. - ---- -## migrate_calc_tables_to_lakehouse -#### Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_calc_tables_to_lakehouse( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_calc_tables_to_semantic_model -#### Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_calc_tables_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_field_parameters -#### Migrates field parameters from one semantic model to another. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_field_parameters( - dataset = 'AdventureWorks', - new_dataset = '', - #workspace = '', - #new_dataset_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_model_objects_to_semantic_model -#### Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_model_objects_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = '', - #workspace = '', - #new_dataset_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_tables_columns_to_semantic_model -#### Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_tables_columns_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migration_validation -#### Shows the objects in the original semantic model and whether then were migrated successfully or not. -```python -import semantic-link-labs as labs -labs.migration_validation( - dataset = 'AdvWorks', - new_dataset = 'AdvWorksDL', - workspace = None, - new_dataset_workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. - ---- -## model_bpa_rules -#### Shows the default Best Practice Rules for the semantic model used by the [run_model_bpa](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#run_model_bpa) function -```python -import semantic-link-labs as labs -labs.model_bpa_rules() -``` -### Returns -> A pandas dataframe showing the default semantic model best practice rules. - ---- -## optimize_lakehouse_tables -#### Runs the [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) function over the specified lakehouse tables. -```python -import semantic-link-labs as labs -labs.optimize_lakehouse_tables( - tables = ['Sales', 'Calendar'], - #lakehouse = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.optimize_lakehouse_tables( - tables = None, - #lakehouse = 'MyLakehouse', - #workspace = None - ) -``` -### Parameters -> **tables** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name(s) of the lakehouse delta table(s) to optimize. If 'None' is entered, all of the delta tables in the lakehouse will be queued to be optimized. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## refresh_calc_tables -#### Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.refresh_calc_tables( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## refresh_semantic_model -#### Performs a refresh on a semantic model. -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - refresh_type = 'full', - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - tables = ['Sales', 'Geography'], - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - partitions = ["'Sales'[Sales - 2024]", "'Sales'[Sales - 2023]"], - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - tables = ['Geography'], - partitions = ["'Sales'[Sales - 2024]", "'Sales'[Sales - 2023]"], - workspace = None -) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. If no tables/partitions are specified, the entire semantic model is refreshed. -> -> **tables** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Tables to refresh. -> -> **partitions** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Partitions to refresh. Must be in "'Table'[Partition]" format. -> -> **refresh_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Type of processing to perform. Options: ('full', 'automatic', 'dataOnly', 'calculate', 'clearValues', 'defragment'). Default value: 'full'. -> -> **retry_count** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; Number of retry attempts. Default is 0. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## report_rebind -#### Rebinds a report to a semantic model. -```python -import semantic-link-labs as labs -labs.report_rebind( - report = '', - dataset = '', - #report_workspace = '', - #dataset_workspace = '' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to rebind to the report. -> -> **report_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -> **dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## report_rebind_all -#### Rebinds all reports in a workspace which are bound to a specific semantic model to a new semantic model. -```python -import semantic-link-labs as labs -labs.report_rebind_all( - dataset = '', - new_dataset = '', - #dataset_workspace = '' , - #new_dataset_workspace = '' , - #report_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model currently binded to the reports. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to rebind to the reports. -> -> **dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the original semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the new semantic model resides. -> -> **report_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the reports reside. -### Returns -> A printout stating the success/failure of the operation. - ---- -## resolve_lakehouse_name -#### Returns the name of the lakehouse for a given lakehouse Id. -```python -import semantic-link-labs as labs -labs.resolve_lakehouse_name( - lakehouse_id = '', - #workspace = '' - ) -``` -### Parameters -> **lakehouse_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string containing the lakehouse name. - ---- -## resolve_lakehouse_id -#### Returns the ID of a given lakehouse. -```python -import semantic-link-labs as labs -labs.resolve_lakehouse_id( - lakehouse = 'MyLakehouse', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string conaining the lakehouse ID. - ---- -## resolve_dataset_id -#### Returns the ID of a given semantic model. -```python -import semantic-link-labs as labs -labs.resolve_dataset_id( - dataset = 'MyReport', - #workspace = '' - ) -``` -### Parameters -> **datasetName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspaceName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the semantic model ID. - ---- -## resolve_dataset_name -#### Returns the name of a given semantic model ID. -```python -import semantic-link-labs as labs -labs.resolve_dataset_name( - dataset_id = '', - #workspace = '' - ) -``` -### Parameters -> **dataset_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the semantic model name. - ---- -## resolve_report_id -#### Returns the ID of a given report. -```python -import semantic-link-labs as labs -labs.resolve_report_id( - report = 'MyReport', - #workspace = '' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -### Returns -> A string containing the report ID. - ---- -## resolve_report_name -#### Returns the name of a given report ID. -```python -import semantic-link-labs as labs -labs.resolve_report_name( - report_id = '', - #workspace = '' - ) -``` -### Parameters -> **report_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -### Returns -> A string containing the report name. - ---- -## run_dax -#### Runs a DAX query against a semantic model. -```python -import semantic-link-labs as labs -labs.run_dax( - dataset = 'AdventureWorks', - dax_query = 'Internet Sales', - user_name = 'FACT_InternetSales', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **dax_query** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX query to be executed. -> -> **user_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe with the results of the DAX query. - ---- -## run_model_bpa -#### Runs the Best Practice Rules against a semantic model. -```python -import semantic-link-labs as labs -labs.run_model_bpa( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **rules_dataframe** -> ->> Optional; A pandas dataframe including rules to be analyzed. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **return_dataframe** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Returns a pandas dataframe instead of the visualization. -> -> **export** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Exports the results to a delta table in the lakehouse. -### Returns -> A visualization showing objects which violate each [Best Practice Rule](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) by rule category. - ---- -## save_as_delta_table -#### Saves a dataframe as a delta table in the lakehouse -```python -import semantic-link-labs as labs -labs.save_as_delta_table( - dataframe = df, - delta_table_name = 'MyNewTable', - write_mode = 'overwrite', - lakehouse = None, - workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.save_as_delta_table( - dataframe = df, - delta_table_name = 'MyNewTable', - write_mode = 'append', - lakehouse = None, - workspace = None - ) -``` -### Parameters -> **dataframe** [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) -> ->> Required; The dataframe to save as a delta table. -> -> **delta_table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the delta table to save the dataframe. -> -> **write_mode** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Options: 'append' or 'overwrite'. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional: The name of the lakehouse in which the delta table will be saved. Defaults to the default lakehouse attached to the notebook. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. Defaults to the workspace in which the notebook resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## show_unsupported_direct_lake_objects -#### Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). -```python -import semantic-link-labs as labs -labs.show_unsupported_direct_lake_objects( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> 3 [pandas dataframes](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) showing objects (tables/columns/relationships) within the semantic model which are currently not supported by Direct Lake mode. - ---- -## translate_semantic_model -#### Translates names, descriptions, display folders for all objects in a semantic model. -```python -import semantic-link-labs as labs -labs.translate_semantic_model( - dataset = 'AdventureWorks', - languages = ['it_IT', 'fr-FR'], - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.translate_semantic_model( - dataset = 'AdventureWorks', - languages = ['it_IT', 'fr-FR'], - exclude_characters = '_-', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **languages** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; [Language code(s)](https://learn.microsoft.com/azure/ai-services/translator/language-support) to translate. -> -> **exclude_characters** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Any character in this string will be replaced by a space when given to the AI translator. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_direct_lake_model_lakehouse_connection -#### Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.update_direct_lake_model_lakehouse_connection( - dataset = '', - #lakehouse = '', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_direct_lake_partition_entity -#### Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.update_direct_lake_partition_entity( - dataset = 'AdventureWorks', - table_name = 'Internet Sales', - entity_name = 'FACT_InternetSales', - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -```python -import semantic-link-labs as labs -labs.update_direct_lake_partition_entity( - dataset = 'AdventureWorks', - table_name = ['Internet Sales', 'Geography'], - entity_name = ['FACT_InternetSales', 'DimGeography'], - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the table in the semantic model. -> -> **entity_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the lakehouse table to be mapped to the semantic model table. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_item -#### Creates a warehouse in Fabric. -```python -import semantic-link-labs as labs -labs.update_item( - item_type = 'Lakehouse', - current_name = 'MyLakehouse', - new_name = 'MyNewLakehouse', - #description = 'This is my new lakehouse', - #workspace = None - ) -``` -### Parameters -> **item_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Type of item to update. Valid options: 'DataPipeline', 'Eventstream', 'KQLDatabase', 'KQLQueryset', 'Lakehouse', 'MLExperiment', 'MLModel', 'Notebook', 'Warehouse'. -> -> **current_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Required; Current name of the item. -> -> **new_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Required; New name of the item. -> -> **description** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; New description of the item. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the item resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## vertipaq_analyzer -#### Extracts the vertipaq analyzer statistics from a semantic model. -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = None - ) -``` - -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = 'zip' - ) -``` - -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = 'table' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **export** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the [import_vertipaq_analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#import_vertipaq_analyzer) function. Specifying 'table' will export the results to delta tables (appended) in your lakehouse. Default value: None. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse used by a Direct Lake semantic model resides. -> -> **read_stats_from_data** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse). -### Returns -> A visualization of the Vertipaq Analyzer statistics. - ---- -## warm_direct_lake_cache_perspective -#### Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.warm_direct_lake_cache_perspective( - dataset = 'AdventureWorks', - perspective = 'WarmCache', - add_dependencies = True, - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **perspective** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the perspective which contains objects to be used for warming the cache. -> -> **add_dependencies** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Includes object dependencies in the cache warming process. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## warm_direct_lake_cache_isresident -#### Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.warm_direct_lake_cache_isresident( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. ---- - -# semantic-link-labs.TOM Functions - -## connect_semantic_model -#### Forms the connection to the Tabular Object Model (TOM) for a semantic model -```python -with connect_semantic_model(dataset ='AdventureWorks', workspace = None, readonly = True) as tom: -``` -```python -with connect_semantic_model(dataset ='AdventureWorks', workspace = None, readonly = False) as tom: -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the workspace in which the semantic model resides. Defaults to the workspace in which the notebook resides. -> -> **readonly** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting this to true uses a read only mode of TOM. Setting this to false enables read/write and saves any changes made to the semantic model. Default value: True. - -## add_calculated_column -#### Adds a calculated column to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_column( - table_name = 'Segment', - column_name = 'Business Segment', - expression = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table where the column will be added. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculated column. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression for the calculated column. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the calculated column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The formats strinf for the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_calculated_table -#### Adds a calculated table to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_table( - name = 'Segment', - expression = '' - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression for the table. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the table. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the table. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the table to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_calculated_table_column -#### Adds a calculated table column to a calculated table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_table_column( - table_name = 'Segment', - column_name = 'Business Segment', - source_column = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column will reside. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **source_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The source column for the column. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_calculation_group -#### Adds a calculation group to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculation_group( - name = 'Segment', - precedence = 1 - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculation group. -> -> **precedence** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the calculation group. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the calculation group to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_calculation_item -#### Adds a calculation item to a calculation group within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculation_item( - table_name = 'Segment', - calculation_item_name = 'YTD' - expression = "CALCULATE(SELECTEDMEASURE(), DATESYTD('Date'[Date]))" - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **calculation_item_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculation item. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression encapsulating the logic of the calculation item. -> -> **ordinal** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; -> -> **format_string_expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the calculation item. -> -### Returns -> - ---- -## add_data_column -#### Adds a data column to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_data_column( - table_name = 'Segment', - column_name = 'Business Segment', - source_column = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column will exist. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **source_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column in the source. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_entity_partition -#### Adds an entity partition to a table in a semantic model. Entity partitions are used for tables within Direct Lake semantic models. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_entity_partition( - table_name = 'Sales', - entity_name = 'Fact_Sales' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which to place the entity partition. -> -> **entity_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the lakehouse table. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The expression to use for the partition. This defaults to using the existing 'DatabaseQuery' expression within the Direct Lake semantic model. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the partition. -> -### Returns -> - ---- -## add_expression -#### Adds an expression to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_expression( - name = 'DatabaseQuery', - expression = 'let...' - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the expression. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The M-code encapsulating the logic for the expression. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the expression. -> -### Returns -> - ---- -## add_field_parameter -#### Adds a field parameter to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_field_parameter( - table_name = 'Segment', - objects = ["'Product'[Product Category]", "[Sales Amount]", "'Geography'[Country]"] - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the field parameter. -> -> **objects** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; A list of columns/tables to place in the field parameter. Columns must be fully qualified (i.e. "'Table Name'[Column Name]" and measures must be unqualified (i.e. "[Measure Name]"). -> -### Returns -> - ---- -## add_hierarchy -#### Adds a hierarchy to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_hierarchy( - table_name = 'Geography', - hierarchy_name = 'Geo Hierarchy', - columns = ['Continent', 'Country', 'City'] - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the hierarchy will reside. -> -> **hierarchy_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the hierarchy. -> -> **columns** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; A list of columns to use in the hierarchy. Must be ordered from the top of the hierarchy down (i.e. ["Continent", "Country", "City"]). -> -> **levels** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; A list of levels to use in the hierarchy. These will be the displayed name (instead of the column names). If omitted, the levels will default to showing the column names. -> -> **hierarchy_description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the hierarchy. -> -> **hierarchy_hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the hierarchy to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_m_partition -#### Adds an M-partition to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_m_partiiton( - table_name = 'Segment', - partition_name = 'Segment', - expression = 'let...', - mode = 'Import' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the partition will reside. -> -> **partition_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the M partition. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The M-code encapsulating the logic of the partition. -> -> **mode** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The storage mode for the partition. Default value: 'Import'. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the partition. -> -### Returns -> - ---- -## add_measure -#### Adds a measure to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_measure( - table_name = 'Sales', - measure_name = 'Sales Amount', - expression = "SUM('Sales'[SalesAmount])", - format_string = '$,00' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the measure will reside. -> -> **measure_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the measure. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression encapsulating the logic of the measure. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the measure. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the measure to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the measure. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the measure. -> -### Returns -> - ---- -## add_perspective -#### Adds a perspective to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_perspective( - perspective_name = 'Marketing' - ) -``` -### Parameters -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## add_relationship -#### Adds a relationship to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_relationship( - from_table = 'Sales', - from_column = 'ProductKey', - to_table = 'Product', - to_column = 'ProductKey', - from_cardinality = 'Many', - to_cardinality = 'One', - is_active = True - ) -``` -### Parameters -> **from_table** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table on the 'from' side of the relationship. -> -> **from_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column on the 'from' side of the relationship. -> -> **to_table** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table on the 'to' side of the relationship. -> -> **to_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column on the 'to' side of the relationship. -> -> **from_cardinality** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. -> -> **to_cardinality** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. -> -> **cross_filtering_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). Default value: 'Automatic'. -> -> **is_active** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting for whether the relationship is active or not. Default value: True. -> -> **security_filtering_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). Default value: 'OneDirection'. -> -> **rely_on_referential_integrity** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; ; Setting for the rely on referential integrity of the relationship. Default value: False. -> -### Returns -> - ---- -## add_role -#### Adds a role to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_role( - role_name = 'Reader' - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **model_permission** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The [model permission](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.modelpermission?view=analysisservices-dotnet) of the role. Default value: 'Reader'. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the role. -> -### Returns -> - ---- -## add_table -#### Adds a table to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_table( - name = 'Sales', - description = 'This is the sales table.', - hidden = False - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The descrition of the table. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the table. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the table to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_to_perspective -#### Adds an object to a perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_to_perspective( - object = tom.model.Tables['Sales'].Measures['Sales Amount'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## add_translation -#### Adds a translation language to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_translation( - language = 'it-IT' - ) -``` -### Parameters -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support) to add to the semantic model. -> -### Returns -> - ---- -## all_calculation_items -#### Outputs a list of all calculation items within all calculation groups in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for c in tom.all_calculation_items(): - print(c.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_columns -#### Outputs a list of all columns within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for c in tom.all_columns(): - print(c.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_hierarchies -#### Outputs a list of all hierarchies within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for h in tom.all_hierarchies(): - print(h.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_levels -#### Outputs a list of all levels within all hierarchies within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for l in tom.all_levels(): - print(l.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_measures -#### Outputs a list of all measures within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for m in tom.all_measures(): - print(m.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_partitions -#### Outputs a list of all partitions within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for p in tom.all_partitions(): - print(p.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_rls -#### Outputs a list of all row level security objects within all roles of the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for r in tom.all_rls(): - print(r.Name) -``` -### Parameters -None -### Returns -> - ---- -## cardinality -#### Obtains the cardinality of a column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.cardinality(object = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## clear_annotations -#### Removes all annotations on a given object within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_annotations(object = tom.model.Tables['Product'].Columns['Color']) -``` -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_annotations(object = tom.model.Tables['Product']) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## clear_extended_properties -#### Removes all extended properties on a given object witihn a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_smantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_extened_properties(object = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## data_size -#### Obtains the data size of a column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.data_size(column = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## depends_on -#### -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.depends_on( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## dictionary_size -#### Obtains the -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.dictionary_size(column = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## fully_qualified_measures -#### Shows all fully-qualified measures referenced by a given measure's DAX expression. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.fully_qualified_measuress( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## get_annotation_value -#### Obtains the annotation value for a given object's annotation in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_annotation_value( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the annotation. -> -### Returns -> - ---- -## get_annotations -#### Obtains all of the annotations for a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_annotations( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## get_extended_properties -#### Obtains all of the extended properties for a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_extended_properties( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## get_extended_property_value -#### Obtains the extended property value for an object's extended property. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_extended_property_value( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyExtendedProperty' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -### Returns -> - ---- -## in_perspective -#### Identifies whether an object is in a given perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.in_perspective( - object = tom.model.Tables['Product'].Columns['Color'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## is_direct_lake -#### Identifies whether a semantic model is in Direct Lake mode. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - print(tom.is_direct_lake()) -``` -### Parameters -None -### Returns -> True/False - ---- -## is_field_parameter -#### Identifies whether a table is a field parameter. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - print(tom.is_field_parameter( - table_name = 'Parameter' - )) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -### Returns -> True/False - ---- -## records_per_segment -#### Obtains the records per segment of a partition within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.records_per_segment( - object = tom.model.Tables['Sales'].Partitions['Sales - 2024'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## referenced_by -#### Shows the objects referenced by a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.referenced_by( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## remove_annotation -#### Removes the annotation from an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_annotation( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the annotation. -> -### Returns -> - ---- -## remove_extended_property -#### Removes the extended property from an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_extended_property( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyExtendedProperty' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -### Returns -> - ---- -## remove_from_perspective -#### Removes an object (table, column, measure or hierarchy) from a perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_from_perspective( - object = tom.model.Tables['Product'].Columns['Color'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## remove_object -#### Removes an object from a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_object( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## remove_translation -#### Removes a translation for an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_translation( - object = tom.model.Tables['Product'].Columns['Color'], - language = 'it-IT' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support). -> -### Returns -> - ---- -## remove_vertipaq_annotations -#### Removes the annotations set using the [set_vertipaq_annotations] function. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_vertipaq_annotations() -``` -### Parameters -None -### Returns -> - ---- -## row_count -#### Obtains the row count of a table or partition within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.row_count( - object = tom.model.Tables['Product'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## set_annotation -#### Sets an annotation on an object within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_annotation( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation', - value = '1' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The annotation name. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The annotation value. -> -### Returns -> - ---- -## set_direct_lake_behavior -#### Sets the [DirectLakeBehavior](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.directlakebehavior?view=analysisservices-dotnet) property for a Direct Lake semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_direct_lake_behavior( - direct_lake_behavior = 'DirectLakeOnly' - ) -``` -### Parameters -> **direct_lake_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [DirectLakeBehavior](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.directlakebehavior?view=analysisservices-dotnet) value. -> -### Returns -> - ---- -## set_extended_property -#### Sets an extended property on an object within the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_extended_property( - object = tom.model.Tables['Product'].Columns['Color'], - type = 'Json', - name = 'MyExtendedProperty', - value = '{...}' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **extended_property_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The type of extended property to set. Options: ['Json', 'String']. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The value of the extended property. -> -### Returns -> - ---- -## set_is_available_in_mdx -#### -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_is_available_in_mdx( - table_name = 'Sales', - column_name = 'SalesAmount', - value = False - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column resides. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **value** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Required; The value to set for the [IsAvailableInMDX](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.isavailableinmdx?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-isavailableinmdx) property. -> -### Returns -> - ---- -## set_ols -#### Sets object level security for a given role/column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_ols( - role_name = 'Reader' - table_name = 'Geography', - column_name = 'Country', - permission = 'None' - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **permission** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The permission for a given column. Options: ['Read', 'None', 'Default']. -> -### Returns -> - ---- -## set_rls -#### Sets the row level security expression for a given role/table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_rls( - role_name = 'Reader' - table_name = 'UserGeography', - filter_expression = "'UserGeography'[UserEmail] = USERPRINCIPALNAME()" - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table to place row level security. -> -> **filter_expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression containing the row level security logic. -> -### Returns -> - ---- -## set_summarize_by -#### Sets the [Summarize By](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby) property on a column in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_summarize_by( - table_name = 'Geography', - column_name = 'Country', - value = 'None' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column resides. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [summarize by](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby) property of the column. -> -### Returns -> - ---- -## set_translation -#### Sets the translation value for an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_translation( - object = tom.model.Tables['Geography'] - language = 'it-IT' - property = 'Name' - value = 'Geografia' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support) in which to translate the object property. -> -> **property** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The property to translate. One of the following values: ['Name', 'Description', 'Display Folder']. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The translation value. -> -### Returns -> - ---- -## set_vertipaq_annotations -#### Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_vertipaq_annotations() -``` -### Parameters -None -### Returns -> - ---- -## total_size -#### Obtains the total size (in bytes) of a table or column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.total_size( - object = tom.model.Tables['Sales'].Columns['SalesAmount'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> The total size (in bytes) of the object. - ---- -## unqualified_columns -#### Shows all unqalified columns referenced by a given measure's DAX expression. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.unqualified_columns( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_calc_item -#### Identifies the calculation items which reference a given object. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.used_in_calc_item( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_hierarchies -#### Identifies the hierarchies which reference a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_hierarchies( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_in_levels -#### Identifies the levels which reference a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_levels( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_in_relationships -#### Identifies the relationships which use a given table/column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_relationships( - object = tom.model.Tables['Geography'].Columns['GeographyID'] - ) -``` -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_relationships( - object = tom.model.Tables['Geography'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## used_in_rls -#### Identifies the filter expressions which reference a given object. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.used_in_rls( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_sort_by -#### Identifies the column used for sorting a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_sort_by( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_size -#### Obtains the used of a hierarchy or relationship within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_size( - object = tom.model.Tables['Geography'].Hierarchies['Geo Hierarchy'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - - - - --- ## Direct Lake migration diff --git a/docs/source/conf.py b/docs/source/conf.py index db597126..c55d412f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,6 +41,6 @@ html_static_path = ['_static'] # List of packages we don't want to install in the environment -autodoc_mock_imports = ['delta', 'synapse', 'jwt', 'semantic-link-sempy', 'pyspark'] +autodoc_mock_imports = ['delta', 'synapse', 'jwt', 'semantic-link-sempy', 'pyspark', 'powerbiclient'] napoleon_numpy_docstring = True \ No newline at end of file From d51b350f5ae0b121528880f9a5cd7d4e904d2041 Mon Sep 17 00:00:00 2001 From: Michael Kovalsky Date: Mon, 10 Jun 2024 20:50:11 +0300 Subject: [PATCH 16/23] organized functions into folders, updated file names, fixed init files, fixed url references within functions. --- notebooks/Migration to Direct Lake.ipynb | 2 +- notebooks/Model Optimization.ipynb | 2 +- notebooks/Query Scale Out.ipynb | 2 +- notebooks/Tabular Object Model.ipynb | 2 +- sempy_labs/GetSemanticModelBim.py | 88 --------- sempy_labs/MeasureDependencyTree.py | 87 --------- sempy_labs/__init__.py | 171 ++++++++++++++++-- sempy_labs/{AI.py => _ai.py} | 4 +- sempy_labs/_clear_cache.py | 2 +- .../{Connections.py => _connections.py} | 0 sempy_labs/_create_blank_semantic_model.py | 58 ------ sempy_labs/{LogAnalytics.py => _dax.py} | 5 +- sempy_labs/_generate_semantic_model.py | 144 ++++++++++++++- sempy_labs/_helper_functions.py | 38 ++++ sempy_labs/_list_functions.py | 3 +- ...ModelAutoBuild.py => _model_auto_build.py} | 6 +- sempy_labs/{ModelBPA.py => _model_bpa.py} | 8 +- ...Dependencies.py => _model_dependencies.py} | 83 ++++++++- ...ntegration.py => _one_lake_integration.py} | 0 sempy_labs/{QSO.py => _query_scale_out.py} | 2 +- ...ticModel.py => _refresh_semantic_model.py} | 6 +- sempy_labs/{TOM.py => _tom.py} | 19 +- .../{Translations.py => _translations.py} | 39 +--- sempy_labs/{Vertipaq.py => _vertipaq.py} | 8 +- sempy_labs/directlake/__init__.py | 53 ++++-- .../directlake/_directlake_schema_compare.py | 3 +- .../directlake/_directlake_schema_sync.py | 2 +- sempy_labs/{ => directlake}/_fallback.py | 0 .../directlake/_get_shared_expression.py | 1 + .../_guardrails.py} | 0 .../_show_unsupported_directlake_objects.py | 3 +- ...e_directlake_model_lakehouse_connection.py | 4 +- .../_update_directlake_partition_entity.py | 3 +- .../_warm_cache.py} | 6 +- sempy_labs/lakehouse/__init__.py | 25 ++- .../lakehouse/_get_lakehouse_columns.py | 1 + sempy_labs/lakehouse/_get_lakehouse_tables.py | 2 +- sempy_labs/lakehouse/_lakehouse.py | 5 +- .../{shortcuts.py => lakehouse/_shortcuts.py} | 8 +- sempy_labs/migration/__init__.py | 27 ++- .../{ => migration}/_create_pqt_file.py | 7 +- .../_migrate_calctables_to_lakehouse.py | 6 +- .../_migrate_calctables_to_semantic_model.py | 7 +- ...migrate_model_objects_to_semantic_model.py | 6 +- ...igrate_tables_columns_to_semantic_model.py | 10 +- sempy_labs/migration/_migration_validation.py | 4 +- .../_refresh_calc_tables.py} | 2 +- sempy_labs/report/__init__.py | 42 +++-- sempy_labs/report/_report_functions.py | 11 +- sempy_labs/report/_report_rebind.py | 2 +- tests/test_shortcuts.py | 2 +- 51 files changed, 619 insertions(+), 402 deletions(-) delete mode 100644 sempy_labs/GetSemanticModelBim.py delete mode 100644 sempy_labs/MeasureDependencyTree.py rename sempy_labs/{AI.py => _ai.py} (99%) rename sempy_labs/{Connections.py => _connections.py} (100%) delete mode 100644 sempy_labs/_create_blank_semantic_model.py rename sempy_labs/{LogAnalytics.py => _dax.py} (92%) rename sempy_labs/{ModelAutoBuild.py => _model_auto_build.py} (96%) rename sempy_labs/{ModelBPA.py => _model_bpa.py} (99%) rename sempy_labs/{GetMeasureDependencies.py => _model_dependencies.py} (77%) rename sempy_labs/{OneLakeIntegration.py => _one_lake_integration.py} (100%) rename sempy_labs/{QSO.py => _query_scale_out.py} (99%) rename sempy_labs/{RefreshSemanticModel.py => _refresh_semantic_model.py} (97%) rename sempy_labs/{TOM.py => _tom.py} (98%) rename sempy_labs/{Translations.py => _translations.py} (94%) rename sempy_labs/{Vertipaq.py => _vertipaq.py} (99%) rename sempy_labs/{ => directlake}/_fallback.py (100%) rename sempy_labs/{Guardrails.py => directlake/_guardrails.py} (100%) rename sempy_labs/{WarmCache.py => directlake/_warm_cache.py} (97%) rename sempy_labs/{shortcuts.py => lakehouse/_shortcuts.py} (96%) rename sempy_labs/{ => migration}/_create_pqt_file.py (96%) rename sempy_labs/{RefreshCalcTables.py => migration/_refresh_calc_tables.py} (99%) diff --git a/notebooks/Migration to Direct Lake.ipynb b/notebooks/Migration to Direct Lake.ipynb index 0eff22da..7a32e723 100644 --- a/notebooks/Migration to Direct Lake.ipynb +++ b/notebooks/Migration to Direct Lake.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"969a29bf","metadata":{},"source":["### Import the library and set initial parameters"]},{"cell_type":"code","execution_count":null,"id":"29c923f8","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","\n","dataset_name = '' #Enter the import/DQ semantic model name\n","workspace_name = None #Enter the workspace of the import/DQ semantic model. It set to none it will use the current workspace.\n","new_dataset_name = '' #Enter the new Direct Lake semantic model name\n","new_dataset_workspace_name = None #Enter the workspace where the Direct Lake model will be created. If set to None it will use the current workspace.\n","lakehouse_name = None #Enter the lakehouse to be used for the Direct Lake model. If set to None it will use the lakehouse attached to the notebook.\n","lakehouse_workspace_name = None #Enter the lakehouse workspace. If set to None it will use the new_dataset_workspace_name."]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Create the [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file\n","\n","This encapsulates all of the semantic model's Power Query logic into a single file."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.create_pqt_file(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"bf945d07-544c-4934-b7a6-cfdb90ca725e","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Import the Power Query Template to Dataflows Gen2\n","\n","- Open the [OneLake file explorer](https://www.microsoft.com/download/details.aspx?id=105222) and sync your files (right click -> Sync from OneLake)\n","\n","- Navigate to your lakehouse. From this window, create a new Dataflows Gen2 and import the Power Query Template file from OneLake (OneLake -> Workspace -> Lakehouse -> Files...), and publish the Dataflows Gen2.\n","\n","
\n","Important!: Make sure to create the Dataflows Gen2 from within the lakehouse window. That will ensure that all the tables automatically map to that lakehouse as the destination. Otherwise, you will have to manually map each table to its destination individually.\n","
"]},{"cell_type":"markdown","id":"9975db7d","metadata":{},"source":["### Create the Direct Lake model based on the import/DQ semantic model\n","\n","Calculated columns are not migrated to the Direct Lake model as they are not supported in Direct Lake mode."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import time\n","labs.create_blank_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","\n","time.sleep(2)\n","\n","labs.migrate_calc_tables_to_lakehouse(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_tables_columns_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_calc_tables_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_model_objects_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","labs.migrate_field_parameters(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","time.sleep(2)\n","labs.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","labs.refresh_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","labs.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"bb98bb13","metadata":{},"source":["### Show migrated/unmigrated objects"]},{"cell_type":"code","execution_count":null,"id":"5db2f22c","metadata":{},"outputs":[],"source":["labs.migration_validation(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name, \n"," workspace = workspace_name, \n"," new_dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"fa244e9d-87c2-4a66-a7e0-be539a0ac7de","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Rebind all reports using the old semantic model to the new Direct Lake semantic model"]},{"cell_type":"code","execution_count":null,"id":"d4e867cc","metadata":{},"outputs":[],"source":["labs.report_rebind_all(\n"," dataset = dataset_name,\n"," dataset_workspace = workspace_name,\n"," new_dataset = new_dataset_name,\n"," new_dataset_workpace = new_dataset_workspace_name,\n"," report_workspace = workspace_name)"]},{"cell_type":"markdown","id":"3365d20d","metadata":{},"source":["### Rebind reports one-by-one (optional)"]},{"cell_type":"code","execution_count":null,"id":"056b7180-d7ac-492c-87e7-ac7d0e4bb929","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["report_name = '' # Enter report name which you want to rebind to the new Direct Lake model\n","\n","labs.report_rebind(\n"," report = report_name,\n"," dataset = new_dataset_name,\n"," report_workspace=workspace_name,\n"," dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"526f2327","metadata":{},"source":["### Show unsupported objects"]},{"cell_type":"code","execution_count":null,"id":"a47376d7","metadata":{},"outputs":[],"source":["dfT, dfC, dfR = labs.show_unsupported_direct_lake_objects(dataset = dataset_name, workspace = workspace_name)\n","\n","print('Calculated Tables are not supported...')\n","display(dfT)\n","print(\"Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations\")\n","print('Calculated columns are not supported. Columns of binary data type are not supported.')\n","display(dfC)\n","print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')\n","display(dfR)"]},{"cell_type":"markdown","id":"ed08ba4c","metadata":{},"source":["### Schema check between semantic model tables/columns and lakehouse tables/columns\n","\n","This will list any tables/columns which are in the new semantic model but do not exist in the lakehouse"]},{"cell_type":"code","execution_count":null,"id":"03889ba4","metadata":{},"outputs":[],"source":["labs.direct_lake_schema_compare(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"2229963b","metadata":{},"source":["### Show calculated tables which have been migrated to the Direct Lake semantic model as regular tables"]},{"cell_type":"code","execution_count":null,"id":"dd537d90","metadata":{},"outputs":[],"source":["labs.list_direct_lake_model_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.12.3"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"969a29bf","metadata":{},"source":["### Import the library and set initial parameters"]},{"cell_type":"code","execution_count":null,"id":"29c923f8","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs import migration, report, directlake\n","\n","dataset_name = '' #Enter the import/DQ semantic model name\n","workspace_name = None #Enter the workspace of the import/DQ semantic model. It set to none it will use the current workspace.\n","new_dataset_name = '' #Enter the new Direct Lake semantic model name\n","new_dataset_workspace_name = None #Enter the workspace where the Direct Lake model will be created. If set to None it will use the current workspace.\n","lakehouse_name = None #Enter the lakehouse to be used for the Direct Lake model. If set to None it will use the lakehouse attached to the notebook.\n","lakehouse_workspace_name = None #Enter the lakehouse workspace. If set to None it will use the new_dataset_workspace_name."]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Create the [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file\n","\n","This encapsulates all of the semantic model's Power Query logic into a single file."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["migration.create_pqt_file(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"bf945d07-544c-4934-b7a6-cfdb90ca725e","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Import the Power Query Template to Dataflows Gen2\n","\n","- Open the [OneLake file explorer](https://www.microsoft.com/download/details.aspx?id=105222) and sync your files (right click -> Sync from OneLake)\n","\n","- Navigate to your lakehouse. From this window, create a new Dataflows Gen2 and import the Power Query Template file from OneLake (OneLake -> Workspace -> Lakehouse -> Files...), and publish the Dataflows Gen2.\n","\n","
\n","Important!: Make sure to create the Dataflows Gen2 from within the lakehouse window. That will ensure that all the tables automatically map to that lakehouse as the destination. Otherwise, you will have to manually map each table to its destination individually.\n","
"]},{"cell_type":"markdown","id":"9975db7d","metadata":{},"source":["### Create the Direct Lake model based on the import/DQ semantic model\n","\n","Calculated columns are not migrated to the Direct Lake model as they are not supported in Direct Lake mode."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import time\n","labs.create_blank_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","\n","time.sleep(2)\n","\n","migration.migrate_calc_tables_to_lakehouse(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_tables_columns_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_calc_tables_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_model_objects_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","migration.migrate_field_parameters(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","time.sleep(2)\n","migration.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","migration.refresh_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","migration.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"bb98bb13","metadata":{},"source":["### Show migrated/unmigrated objects"]},{"cell_type":"code","execution_count":null,"id":"5db2f22c","metadata":{},"outputs":[],"source":["migration.migration_validation(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name, \n"," workspace = workspace_name, \n"," new_dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"fa244e9d-87c2-4a66-a7e0-be539a0ac7de","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Rebind all reports using the old semantic model to the new Direct Lake semantic model"]},{"cell_type":"code","execution_count":null,"id":"d4e867cc","metadata":{},"outputs":[],"source":["report.report_rebind_all(\n"," dataset = dataset_name,\n"," dataset_workspace = workspace_name,\n"," new_dataset = new_dataset_name,\n"," new_dataset_workpace = new_dataset_workspace_name,\n"," report_workspace = workspace_name)"]},{"cell_type":"markdown","id":"3365d20d","metadata":{},"source":["### Rebind reports one-by-one (optional)"]},{"cell_type":"code","execution_count":null,"id":"056b7180-d7ac-492c-87e7-ac7d0e4bb929","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["report_name = '' # Enter report name which you want to rebind to the new Direct Lake model\n","\n","report.report_rebind(\n"," report = report_name,\n"," dataset = new_dataset_name,\n"," report_workspace=workspace_name,\n"," dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"526f2327","metadata":{},"source":["### Show unsupported objects"]},{"cell_type":"code","execution_count":null,"id":"a47376d7","metadata":{},"outputs":[],"source":["dfT, dfC, dfR = directlake.show_unsupported_direct_lake_objects(dataset = dataset_name, workspace = workspace_name)\n","\n","print('Calculated Tables are not supported...')\n","display(dfT)\n","print(\"Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations\")\n","print('Calculated columns are not supported. Columns of binary data type are not supported.')\n","display(dfC)\n","print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')\n","display(dfR)"]},{"cell_type":"markdown","id":"ed08ba4c","metadata":{},"source":["### Schema check between semantic model tables/columns and lakehouse tables/columns\n","\n","This will list any tables/columns which are in the new semantic model but do not exist in the lakehouse"]},{"cell_type":"code","execution_count":null,"id":"03889ba4","metadata":{},"outputs":[],"source":["directlake.direct_lake_schema_compare(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"2229963b","metadata":{},"source":["### Show calculated tables which have been migrated to the Direct Lake semantic model as regular tables"]},{"cell_type":"code","execution_count":null,"id":"dd537d90","metadata":{},"outputs":[],"source":["directlake.list_direct_lake_model_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.12.3"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Model Optimization.ipynb b/notebooks/Model Optimization.ipynb index 43df2f97..0a5a448f 100644 --- a/notebooks/Model Optimization.ipynb +++ b/notebooks/Model Optimization.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analzyer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path = '', file_name = '')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the rules and exports the results to a table in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None, export = True)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["labs.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["labs.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False, export = True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["labs.check_fallback_reason(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["labs.optimize_lakehouse_tables(tables = ['', ''], lakehouse = None, workspace = None)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["labs.warm_direct_lake_cache_isresident(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["labs.warm_direct_lake_cache_perspective(dataset = '', workspace = None, perspective = '', add_dependencies = True)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs._tom import connect_semantic_model\n","from sempy_labs import lakehouse as lake\n","from sempy_labs import directlake"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analzyer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path = '', file_name = '')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the rules and exports the results to a table in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None, export = True)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False, export = True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["directlake.check_fallback_reason(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["lake.optimize_lakehouse_tables(tables = ['', ''], lakehouse = None, workspace = None)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_isresident(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_perspective(dataset = '', workspace = None, perspective = '', add_dependencies = True)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Query Scale Out.ipynb b/notebooks/Query Scale Out.ipynb index 131a967b..a29de474 100644 --- a/notebooks/Query Scale Out.ipynb +++ b/notebooks/Query Scale Out.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"b195eae8","metadata":{},"source":["### Import the library and set the initial parameters"]},{"cell_type":"code","execution_count":null,"id":"1344e286","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","dataset = '' # Enter your dataset name\n","workspace = None # Enter your workspace name (if set to None it will use the workspace in which the notebook is running)"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### View [Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out) (QSO) settings"]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.list_qso_settings(dataset = dataset, workspace = workspace )"]},{"cell_type":"markdown","id":"b0717cbb","metadata":{},"source":["### [Configure Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out-configure)\n","Setting 'auto_sync' to True will ensure that the semantic model automatically syncs read-only replicas. Setting this to False will necessitate syncing the replicas (i.e. via the qso_sync function).\n","\n","The 'max_read_only_replicas' is the maximum number of read-only replicas for the semantic model (0-64, -1 for automatic number of replicas).\n"]},{"cell_type":"code","execution_count":null,"id":"ec37dd14","metadata":{},"outputs":[],"source":["labs.set_qso(dataset = dataset, auto_sync = False, max_read_only_replicas = -1, workspace = workspace)"]},{"cell_type":"markdown","id":"5d6beadd","metadata":{},"source":["### Sync Query Scale Out replicas"]},{"cell_type":"code","execution_count":null,"id":"7ca10963","metadata":{},"outputs":[],"source":["labs.qso_sync(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"719f428f","metadata":{},"source":["### Check Query Scale Out Sync Status"]},{"cell_type":"code","execution_count":null,"id":"db6f197c","metadata":{},"outputs":[],"source":["dfA, dfB = labs.qso_sync_status(dataset = dataset, workspace = workspace)\n","display(dfA)\n","display(dfB)"]},{"cell_type":"markdown","id":"e92cdf34","metadata":{},"source":["### Disable Query Scale Out"]},{"cell_type":"code","execution_count":null,"id":"0624d649","metadata":{},"outputs":[],"source":["labs.disable_qso(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"786d89bc","metadata":{},"source":["### Enable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"d521b228","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Large', workspace = workspace)"]},{"cell_type":"markdown","id":"e90c20e9","metadata":{},"source":["### Disable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"433220b2","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Small', workspace = workspace)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"b195eae8","metadata":{},"source":["### Import the library and set the initial parameters"]},{"cell_type":"code","execution_count":null,"id":"1344e286","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","dataset = '' # Enter your dataset name\n","workspace = None # Enter your workspace name (if set to None it will use the workspace in which the notebook is running)"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### View [Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out) (QSO) settings"]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.list_qso_settings(dataset = dataset, workspace = workspace )"]},{"cell_type":"markdown","id":"b0717cbb","metadata":{},"source":["### [Configure Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out-configure)\n","Setting 'auto_sync' to True will ensure that the semantic model automatically syncs read-only replicas. Setting this to False will necessitate syncing the replicas (i.e. via the qso_sync function).\n","\n","The 'max_read_only_replicas' is the maximum number of read-only replicas for the semantic model (0-64, -1 for automatic number of replicas).\n"]},{"cell_type":"code","execution_count":null,"id":"ec37dd14","metadata":{},"outputs":[],"source":["labs.set_qso(dataset = dataset, auto_sync = False, max_read_only_replicas = -1, workspace = workspace)"]},{"cell_type":"markdown","id":"5d6beadd","metadata":{},"source":["### Sync Query Scale Out replicas"]},{"cell_type":"code","execution_count":null,"id":"7ca10963","metadata":{},"outputs":[],"source":["labs.qso_sync(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"719f428f","metadata":{},"source":["### Check Query Scale Out Sync Status"]},{"cell_type":"code","execution_count":null,"id":"db6f197c","metadata":{},"outputs":[],"source":["dfA, dfB = labs.qso_sync_status(dataset = dataset, workspace = workspace)\n","display(dfA)\n","display(dfB)"]},{"cell_type":"markdown","id":"e92cdf34","metadata":{},"source":["### Disable Query Scale Out"]},{"cell_type":"code","execution_count":null,"id":"0624d649","metadata":{},"outputs":[],"source":["labs.disable_qso(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"786d89bc","metadata":{},"source":["### Enable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"d521b228","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Large', workspace = workspace)"]},{"cell_type":"markdown","id":"e90c20e9","metadata":{},"source":["### Disable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"433220b2","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Small', workspace = workspace)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Tabular Object Model.ipynb b/notebooks/Tabular Object Model.ipynb index 595bf886..fe6df427 100644 --- a/notebooks/Tabular Object Model.ipynb +++ b/notebooks/Tabular Object Model.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://github.com/m-kovalsky/fabric_cat_tools) to see all available parameters for each function."]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name = t.Name, measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name = t.Name, measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name ='Product', column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," tom.add_data_column(table_name = 'Segment', column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n","\n"," tom.add_calculated_column(table_name = 'Internet Sales', column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name = t.Name, column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name = 'Geography', hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name = t.Name, hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table = 'Internet Sales', from_column = 'ProductKey',\n"," to_table = 'Product', to_column = 'ProductKey', \n"," from_cardinality = 'Many', to_cardinality = 'One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_m_partition(table_name = table_name, partition_name = table_name, expression = 'let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_entity_partition(table_name = table_name, entity_name = table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name = table_name, expression = \"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name = table_name, column_name = 'Color', source_column = \"'Product[Color]\", data_type = 'String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name = 'Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(role_name ='Reader', table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name = r.Name, table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name = 'Reader', table_name = 'Product', column_name = 'Size', permission = 'None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name = r.Name, table_name = t.Name, column_name = 'Size', permission = 'None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name = 'MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language = 'it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language = 'it-IT', property = 'Name', value = 'Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name = 'Parameter', objects = \"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object = t.Columns['Size'])\n"," tom.remove_object(object = t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object = tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object = tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(t.Name + ' : ' + str(rc))\n"," for c in t.Columns:\n"," col_size = tom.total_size(column = c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(full_name + ' : ' + h.Name)"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(full_name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(t.Name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs._tom import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://github.com/m-kovalsky/fabric_cat_tools) to see all available parameters for each function."]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name = t.Name, measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name = t.Name, measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name ='Product', column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," tom.add_data_column(table_name = 'Segment', column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n","\n"," tom.add_calculated_column(table_name = 'Internet Sales', column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name = t.Name, column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name = 'Geography', hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name = t.Name, hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table = 'Internet Sales', from_column = 'ProductKey',\n"," to_table = 'Product', to_column = 'ProductKey', \n"," from_cardinality = 'Many', to_cardinality = 'One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_m_partition(table_name = table_name, partition_name = table_name, expression = 'let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_entity_partition(table_name = table_name, entity_name = table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name = table_name, expression = \"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name = table_name, column_name = 'Color', source_column = \"'Product[Color]\", data_type = 'String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name = 'Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(role_name ='Reader', table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name = r.Name, table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name = 'Reader', table_name = 'Product', column_name = 'Size', permission = 'None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name = r.Name, table_name = t.Name, column_name = 'Size', permission = 'None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name = 'MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language = 'it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language = 'it-IT', property = 'Name', value = 'Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name = 'Parameter', objects = \"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object = t.Columns['Size'])\n"," tom.remove_object(object = t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object = tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object = tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(t.Name + ' : ' + str(rc))\n"," for c in t.Columns:\n"," col_size = tom.total_size(column = c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(full_name + ' : ' + h.Name)"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(full_name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(t.Name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/sempy_labs/GetSemanticModelBim.py b/sempy_labs/GetSemanticModelBim.py deleted file mode 100644 index f60526e5..00000000 --- a/sempy_labs/GetSemanticModelBim.py +++ /dev/null @@ -1,88 +0,0 @@ -import sempy.fabric as fabric -import pandas as pd -import json, os, time, base64 -from sempy_labs._helper_functions import resolve_lakehouse_name -from sempy_labs.lakehouse import lakehouse_attached -from typing import List, Optional, Union - - -def get_semantic_model_bim( - dataset: str, - workspace: Optional[str] = None, - save_to_file_name: Optional[str] = None, -): - """ - Extracts the Model.bim file for a given semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - save_to_file_name : str, default=None - If specified, saves the Model.bim as a file in the lakehouse attached to the notebook. - - Returns - ------- - str - The Model.bim file for the semantic model. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objType = "SemanticModel" - client = fabric.FabricRestClient() - itemList = fabric.list_items(workspace=workspace, type=objType) - itemListFilt = itemList[(itemList["Display Name"] == dataset)] - itemId = itemListFilt["Id"].iloc[0] - response = client.post( - f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" - ) - - if response.status_code == 200: - res = response.json() - elif response.status_code == 202: - operationId = response.headers["x-ms-operation-id"] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body["status"] != "Succeeded": - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - res = response.json() - df_items = pd.json_normalize(res["definition"]["parts"]) - df_items_filt = df_items[df_items["path"] == "model.bim"] - payload = df_items_filt["payload"].iloc[0] - bimFile = base64.b64decode(payload).decode("utf-8") - bimJson = json.loads(bimFile) - - if save_to_file_name is not None: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print( - f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." - ) - return - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - folderPath = "/lakehouse/default/Files" - fileExt = ".bim" - if not save_to_file_name.endswith(fileExt): - save_to_file_name = save_to_file_name + fileExt - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: - json.dump(bimJson, json_file, indent=4) - print( - f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" - ) - - return bimJson diff --git a/sempy_labs/MeasureDependencyTree.py b/sempy_labs/MeasureDependencyTree.py deleted file mode 100644 index 32000041..00000000 --- a/sempy_labs/MeasureDependencyTree.py +++ /dev/null @@ -1,87 +0,0 @@ -import sempy -import sempy.fabric as fabric -from anytree import Node, RenderTree -from .GetMeasureDependencies import get_measure_dependencies -from typing import List, Optional, Union -from sempy._utils._log import log - - -@log -def measure_dependency_tree( - dataset: str, measure_name: str, workspace: Optional[str] = None -): - """ - Prints a measure dependency tree of all dependent objects for a measure in a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - measure_name : str - Name of the measure. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dfM = fabric.list_measures(dataset=dataset, workspace=workspace) - dfM_filt = dfM[dfM["Measure Name"] == measure_name] - - if len(dfM_filt) == 0: - print( - f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace." - ) - return - - md = get_measure_dependencies(dataset, workspace) - df_filt = md[md["Object Name"] == measure_name] - - # Create a dictionary to hold references to nodes - node_dict = {} - measureIcon = "\u2211" - tableIcon = "\u229E" - columnIcon = "\u229F" - - # Populate the tree - for _, row in df_filt.iterrows(): - # measure_name = row['Object Name'] - ref_obj_table_name = row["Referenced Table"] - ref_obj_name = row["Referenced Object"] - ref_obj_type = row["Referenced Object Type"] - parent_node_name = row["Parent Node"] - - # Create or get the parent node - parent_node = node_dict.get(parent_node_name) - if parent_node is None: - parent_node = Node(parent_node_name) - node_dict[parent_node_name] = parent_node - parent_node.custom_property = measureIcon + " " - - # Create the child node - child_node_name = ref_obj_name - child_node = Node(child_node_name, parent=parent_node) - if ref_obj_type == "Column": - child_node.custom_property = columnIcon + " '" + ref_obj_table_name + "'" - elif ref_obj_type == "Table": - child_node.custom_property = tableIcon + " " - elif ref_obj_type == "Measure": - child_node.custom_property = measureIcon + " " - - # Update the dictionary with the child node - node_dict[child_node_name] = child_node - - # Visualize the tree structure using RenderTree - for pre, _, node in RenderTree(node_dict[measure_name]): - if tableIcon in node.custom_property: - print(f"{pre}{node.custom_property}'{node.name}'") - else: - print(f"{pre}{node.custom_property}[{node.name}]") diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py index 29d98378..713efdc6 100644 --- a/sempy_labs/__init__.py +++ b/sempy_labs/__init__.py @@ -1,27 +1,158 @@ -from sempy_labs._clear_cache import clear_cache as clear_cache -from sempy_labs._create_blank_semantic_model import ( - create_blank_semantic_model as create_blank_semantic_model, +from sempy_labs._clear_cache import clear_cache +#from sempy_labs._connections import ( + #create_connection_cloud, + #create_connection_vnet, + #create_connection_on_prem +#) +from sempy_labs._dax import ( + run_dax ) -from sempy_labs._create_pqt_file import create_pqt_file as create_pqt_file -from sempy_labs._fallback import check_fallback_reason as check_fallback_reason from sempy_labs._generate_semantic_model import ( - create_semantic_model_from_bim as create_semantic_model_from_bim, - deploy_semantic_model as deploy_semantic_model, + create_blank_semantic_model, + create_semantic_model_from_bim, + #deploy_semantic_model, + get_semantic_model_bim ) from sempy_labs._list_functions import ( - get_object_level_security as get_object_level_security, + get_object_level_security, + #list_annotations, + #list_columns, + list_dashboards, + list_dataflow_storage_accounts, + #list_datamarts, + #list_datapipelines, + #list_eventstreams, + #list_kpis, + #list_kqldatabases, + #list_kqlquerysets, + list_lakehouses, + #list_mirroredwarehouses, + #list_mlexperiments, + #list_mlmodels, + #list_relationships, + #list_sqlendpoints, + #list_tables, + list_warehouses, + #list_workspace_role_assignments, + create_warehouse, + update_item, ) + from sempy_labs._helper_functions import ( - resolve_lakehouse_name as resolve_lakehouse_name, - save_as_delta_table as save_as_delta_table, - generate_embedded_filter as generate_embedded_filter, - get_direct_lake_sql_endpoint as get_direct_lake_sql_endpoint, - resolve_lakehouse_id as resolve_lakehouse_id, - resolve_dataset_name as resolve_dataset_name, - resolve_dataset_id as resolve_dataset_id, - resolve_report_name as resolve_report_name, - resolve_report_id as resolve_report_id, - create_relationship_name as create_relationship_name, - format_dax_object_name as format_dax_object_name, - create_abfss_path as create_abfss_path, + create_abfss_path, + format_dax_object_name, + create_relationship_name, + save_as_delta_table, + generate_embedded_filter, + get_direct_lake_sql_endpoint, + resolve_lakehouse_id, + resolve_lakehouse_name, + resolve_dataset_id, + resolve_dataset_name, + resolve_report_id, + resolve_report_name, + #language_validate ) +from sempy_labs._model_auto_build import ( + model_auto_build +) +from sempy_labs._model_bpa import ( + model_bpa_rules, + run_model_bpa +) +from sempy_labs._model_dependencies import ( + measure_dependency_tree, + get_measure_dependencies, + get_model_calc_dependencies +) +from sempy_labs._one_lake_integration import ( + export_model_to_onelake, +) +#from sempy_labs._query_scale_out import ( +# qso_sync, +# qso_sync_status, +# set_qso, +# list_qso_settings, +# disable_qso, +# set_semantic_model_storage_format, +# set_workspace_default_storage_format, +#) +from sempy_labs._refresh_semantic_model import ( + refresh_semantic_model, + cancel_dataset_refresh +) +from sempy_labs._translations import ( + translate_semantic_model +) +from sempy_labs._vertipaq import ( + vertipaq_analyzer, + #visualize_vertipaq, + import_vertipaq_analyzer +) + +__all__ = [ + 'clear_cache', + #create_connection_cloud, + #create_connection_vnet, + #create_connection_on_prem, + 'run_dax', + 'create_blank_semantic_model', + 'create_semantic_model_from_bim', + #'deploy_semantic_model', + 'get_semantic_model_bim', + 'get_object_level_security', + #'list_annotations', + #'list_columns', + 'list_dashboards', + 'list_dataflow_storage_accounts', + #'list_datamarts', + #'list_datapipelines', + #'list_eventstreams', + #'list_kpis', + #'list_kqldatabases', + #'list_kqlquerysets', + 'list_lakehouses', + #'list_mirroredwarehouses', + #'list_mlexperiments', + #'list_mlmodels', + #'list_relationships', + #'list_sqlendpoints', + #'list_tables', + 'list_warehouses', + #'list_workspace_role_assignments', + 'create_warehouse', + 'update_item', + 'create_abfss_path', + 'format_dax_object_name', + 'create_relationship_name', + 'save_as_delta_table', + 'generate_embedded_filter', + 'get_direct_lake_sql_endpoint', + 'resolve_lakehouse_id', + 'resolve_lakehouse_name', + 'resolve_dataset_id', + 'resolve_dataset_name', + 'resolve_report_id', + 'resolve_report_name', + #'language_validate', + 'model_auto_build', + 'model_bpa_rules', + 'run_model_bpa', + 'measure_dependency_tree', + 'get_measure_dependencies', + 'get_model_calc_dependencies', + 'export_model_to_onelake', + #'qso_sync', + #'qso_sync_status', + #'set_qso', + #'list_qso_settings', + #'disable_qso', + #'set_semantic_model_storage_format', + #'set_workspace_default_storage_format', + 'refresh_semantic_model', + 'cancel_dataset_refresh', + 'translate_semantic_model', + 'vertipaq_analyzer', + #'visualize_vertipaq', + 'import_vertipaq_analyzer' +] \ No newline at end of file diff --git a/sempy_labs/AI.py b/sempy_labs/_ai.py similarity index 99% rename from sempy_labs/AI.py rename to sempy_labs/_ai.py index c6ecd9fd..0f253156 100644 --- a/sempy_labs/AI.py +++ b/sempy_labs/_ai.py @@ -9,8 +9,8 @@ def optimize_semantic_model(dataset: str, workspace: Optional[str] = None): - from .ModelBPA import run_model_bpa - from ._fallback import check_fallback_reason + from ._model_bpa import run_model_bpa + from .directlake._fallback import check_fallback_reason from ._helper_functions import format_dax_object_name modelBPA = run_model_bpa( diff --git a/sempy_labs/_clear_cache.py b/sempy_labs/_clear_cache.py index 426f339b..cab8d8db 100644 --- a/sempy_labs/_clear_cache.py +++ b/sempy_labs/_clear_cache.py @@ -1,6 +1,6 @@ import sempy import sempy.fabric as fabric -from sempy_labs._helper_functions import resolve_dataset_id +from ._helper_functions import resolve_dataset_id from typing import List, Optional, Union import sempy_labs._icons as icons diff --git a/sempy_labs/Connections.py b/sempy_labs/_connections.py similarity index 100% rename from sempy_labs/Connections.py rename to sempy_labs/_connections.py diff --git a/sempy_labs/_create_blank_semantic_model.py b/sempy_labs/_create_blank_semantic_model.py deleted file mode 100644 index af2b0bb1..00000000 --- a/sempy_labs/_create_blank_semantic_model.py +++ /dev/null @@ -1,58 +0,0 @@ -import sempy -import sempy.fabric as fabric -from typing import List, Optional, Union -import sempy_labs._icons as icons - - -def create_blank_semantic_model( - dataset: str, - compatibility_level: Optional[int] = 1605, - workspace: Optional[str] = None, -): - """ - Creates a new blank semantic model (no tables/columns etc.). - - Parameters - ---------- - dataset : str - Name of the semantic model. - compatibility_level : int - The compatibility level of the semantic model. - Defaults to 1605. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if compatibility_level < 1500: - print(f"{icons.red_dot} Compatiblity level must be at least 1500.") - return - - tmsl = f""" - {{ - "createOrReplace": {{ - "object": {{ - "database": '{dataset}' - }}, - "database": {{ - "name": '{dataset}', - "compatibilityLevel": {compatibility_level}, - "model": {{ - "culture": "en-US", - "defaultPowerBIDataSourceVersion": "powerBI_V3" - }} - }} - }} - }} - """ - - fabric.execute_tmsl(script=tmsl, workspace=workspace) - - return print( - f"{icons.green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace." - ) diff --git a/sempy_labs/LogAnalytics.py b/sempy_labs/_dax.py similarity index 92% rename from sempy_labs/LogAnalytics.py rename to sempy_labs/_dax.py index 14d7197d..25976be9 100644 --- a/sempy_labs/LogAnalytics.py +++ b/sempy_labs/_dax.py @@ -1,7 +1,7 @@ import sempy import sempy.fabric as fabric import pandas as pd -from ._helper_functions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union from sempy._utils._log import log @@ -22,8 +22,9 @@ def run_dax( Name of the semantic model. dax_query : str The DAX query. - user_name : str | None + user_name : str The user name (i.e. hello@goodbye.com). + Defaults to None which resolves to no user impersonation. workspace : str, default=None The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse diff --git a/sempy_labs/_generate_semantic_model.py b/sempy_labs/_generate_semantic_model.py index fd11a822..04bfa464 100644 --- a/sempy_labs/_generate_semantic_model.py +++ b/sempy_labs/_generate_semantic_model.py @@ -1,8 +1,65 @@ import sempy import sempy.fabric as fabric -import json, base64, time -from .GetSemanticModelBim import get_semantic_model_bim -from typing import Optional +import pandas as pd +import json, base64, time, os +from sempy_labs._generate_semantic_model import get_semantic_model_bim +from typing import List, Optional, Union +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +import sempy_labs._icons as icons + +def create_blank_semantic_model( + dataset: str, + compatibility_level: Optional[int] = 1605, + workspace: Optional[str] = None, +): + """ + Creates a new blank semantic model (no tables/columns etc.). + + Parameters + ---------- + dataset : str + Name of the semantic model. + compatibility_level : int + The compatibility level of the semantic model. + Defaults to 1605. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if compatibility_level < 1500: + print(f"{icons.red_dot} Compatiblity level must be at least 1500.") + return + + tmsl = f""" + {{ + "createOrReplace": {{ + "object": {{ + "database": '{dataset}' + }}, + "database": {{ + "name": '{dataset}', + "compatibilityLevel": {compatibility_level}, + "model": {{ + "culture": "en-US", + "defaultPowerBIDataSourceVersion": "powerBI_V3" + }} + }} + }} + }} + """ + + fabric.execute_tmsl(script=tmsl, workspace=workspace) + + return print( + f"{icons.green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace." + ) def create_semantic_model_from_bim( @@ -144,3 +201,84 @@ def deploy_semantic_model( create_semantic_model_from_bim( dataset=new_dataset, bim_file=bim, workspace=new_dataset_workspace ) + +def get_semantic_model_bim( + dataset: str, + workspace: Optional[str] = None, + save_to_file_name: Optional[str] = None, +): + """ + Extracts the Model.bim file for a given semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + save_to_file_name : str, default=None + If specified, saves the Model.bim as a file in the lakehouse attached to the notebook. + + Returns + ------- + str + The Model.bim file for the semantic model. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + objType = "SemanticModel" + client = fabric.FabricRestClient() + itemList = fabric.list_items(workspace=workspace, type=objType) + itemListFilt = itemList[(itemList["Display Name"] == dataset)] + itemId = itemListFilt["Id"].iloc[0] + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" + ) + + if response.status_code == 200: + res = response.json() + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + res = response.json() + df_items = pd.json_normalize(res["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "model.bim"] + payload = df_items_filt["payload"].iloc[0] + bimFile = base64.b64decode(payload).decode("utf-8") + bimJson = json.loads(bimFile) + + if save_to_file_name is not None: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + folderPath = "/lakehouse/default/Files" + fileExt = ".bim" + if not save_to_file_name.endswith(fileExt): + save_to_file_name = save_to_file_name + fileExt + filePath = os.path.join(folderPath, save_to_file_name) + with open(filePath, "w") as json_file: + json.dump(bimJson, json_file, indent=4) + print( + f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + ) + + return bimJson diff --git a/sempy_labs/_helper_functions.py b/sempy_labs/_helper_functions.py index 46a36f18..cda7f488 100644 --- a/sempy_labs/_helper_functions.py +++ b/sempy_labs/_helper_functions.py @@ -1,5 +1,7 @@ +import sempy import sempy.fabric as fabric import re +import pandas as pd from pyspark.sql import SparkSession from typing import Optional from uuid import UUID @@ -480,3 +482,39 @@ def save_as_delta_table( print( f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace." ) + +def language_validate(language: str): + """ + Validateds that the language specified exists within the supported langauges. + + Parameters + ---------- + language : str + The language code. + + Returns + ------- + bool + A True/False indication as to whether the language code is supported. + """ + + url = "https://learn.microsoft.com/azure/ai-services/translator/language-support" + + tables = pd.read_html(url) + df = tables[0] + + df_filt = df[df["Language code"] == language] + + df_filt2 = df[df["Language"] == language.capitalize()] + + if len(df_filt) == 1: + lang = df_filt["Language"].iloc[0] + elif len(df_filt2) == 1: + lang = df_filt2["Language"].iloc[0] + else: + print( + f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}." + ) + return + + return lang \ No newline at end of file diff --git a/sempy_labs/_list_functions.py b/sempy_labs/_list_functions.py index f2fe7c2f..13ed07c4 100644 --- a/sempy_labs/_list_functions.py +++ b/sempy_labs/_list_functions.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric import pandas as pd import json, time @@ -1341,7 +1342,7 @@ def list_kpis(dataset: str, workspace: Optional[str] = None): A pandas dataframe showing the KPIs for the semantic model. """ - from .TOM import connect_semantic_model + from ._tom import connect_semantic_model with connect_semantic_model( dataset=dataset, workspace=workspace, readonly=True diff --git a/sempy_labs/ModelAutoBuild.py b/sempy_labs/_model_auto_build.py similarity index 96% rename from sempy_labs/ModelAutoBuild.py rename to sempy_labs/_model_auto_build.py index befa151a..0fa7070e 100644 --- a/sempy_labs/ModelAutoBuild.py +++ b/sempy_labs/_model_auto_build.py @@ -1,9 +1,9 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .TOM import connect_semantic_model -from ._create_blank_semantic_model import create_blank_semantic_model -from .directlake.GetSharedExpression import get_shared_expression +from sempy_labs._tom import connect_semantic_model +from sempy_labs._generate_semantic_model import create_blank_semantic_model +from sempy_labs.directlake._get_shared_expression import get_shared_expression from typing import List, Optional, Union from sempy._utils._log import log diff --git a/sempy_labs/ModelBPA.py b/sempy_labs/_model_bpa.py similarity index 99% rename from sempy_labs/ModelBPA.py rename to sempy_labs/_model_bpa.py index 9059c07f..dfd3a61e 100644 --- a/sempy_labs/ModelBPA.py +++ b/sempy_labs/_model_bpa.py @@ -5,10 +5,10 @@ import numpy as np from IPython.display import display, HTML from pyspark.sql import SparkSession -from .GetMeasureDependencies import get_measure_dependencies -from ._helper_functions import format_dax_object_name, resolve_lakehouse_name -from .lakehouse.Lakehouse import lakehouse_attached -from .lakehouse.GetLakehouseTables import get_lakehouse_tables +from sempy_labs._model_dependencies import get_measure_dependencies +from sempy_labs._helper_functions import format_dax_object_name, resolve_lakehouse_name +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached from typing import List, Optional, Union from sempy._utils._log import log diff --git a/sempy_labs/GetMeasureDependencies.py b/sempy_labs/_model_dependencies.py similarity index 77% rename from sempy_labs/GetMeasureDependencies.py rename to sempy_labs/_model_dependencies.py index ecb2a28a..f5db757d 100644 --- a/sempy_labs/GetMeasureDependencies.py +++ b/sempy_labs/_model_dependencies.py @@ -1,8 +1,10 @@ import sempy import sempy.fabric as fabric import pandas as pd -from ._helper_functions import format_dax_object_name +from sempy_labs._helper_functions import format_dax_object_name from typing import List, Optional, Union +from anytree import Node, RenderTree +from sempy._utils._log import log def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): @@ -256,3 +258,82 @@ def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): df = df.drop(["Done"], axis=1) return df +@log +def measure_dependency_tree( + dataset: str, measure_name: str, workspace: Optional[str] = None +): + """ + Prints a measure dependency tree of all dependent objects for a measure in a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + measure_name : str + Name of the measure. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfM_filt = dfM[dfM["Measure Name"] == measure_name] + + if len(dfM_filt) == 0: + print( + f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace." + ) + return + + md = get_measure_dependencies(dataset, workspace) + df_filt = md[md["Object Name"] == measure_name] + + # Create a dictionary to hold references to nodes + node_dict = {} + measureIcon = "\u2211" + tableIcon = "\u229E" + columnIcon = "\u229F" + + # Populate the tree + for _, row in df_filt.iterrows(): + # measure_name = row['Object Name'] + ref_obj_table_name = row["Referenced Table"] + ref_obj_name = row["Referenced Object"] + ref_obj_type = row["Referenced Object Type"] + parent_node_name = row["Parent Node"] + + # Create or get the parent node + parent_node = node_dict.get(parent_node_name) + if parent_node is None: + parent_node = Node(parent_node_name) + node_dict[parent_node_name] = parent_node + parent_node.custom_property = measureIcon + " " + + # Create the child node + child_node_name = ref_obj_name + child_node = Node(child_node_name, parent=parent_node) + if ref_obj_type == "Column": + child_node.custom_property = columnIcon + " '" + ref_obj_table_name + "'" + elif ref_obj_type == "Table": + child_node.custom_property = tableIcon + " " + elif ref_obj_type == "Measure": + child_node.custom_property = measureIcon + " " + + # Update the dictionary with the child node + node_dict[child_node_name] = child_node + + # Visualize the tree structure using RenderTree + for pre, _, node in RenderTree(node_dict[measure_name]): + if tableIcon in node.custom_property: + print(f"{pre}{node.custom_property}'{node.name}'") + else: + print(f"{pre}{node.custom_property}[{node.name}]") diff --git a/sempy_labs/OneLakeIntegration.py b/sempy_labs/_one_lake_integration.py similarity index 100% rename from sempy_labs/OneLakeIntegration.py rename to sempy_labs/_one_lake_integration.py diff --git a/sempy_labs/QSO.py b/sempy_labs/_query_scale_out.py similarity index 99% rename from sempy_labs/QSO.py rename to sempy_labs/_query_scale_out.py index 10e74e98..d38a7b2c 100644 --- a/sempy_labs/QSO.py +++ b/sempy_labs/_query_scale_out.py @@ -1,7 +1,7 @@ import sempy import sempy.fabric as fabric import pandas as pd -from ._helper_functions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union import sempy_labs._icons as icons diff --git a/sempy_labs/RefreshSemanticModel.py b/sempy_labs/_refresh_semantic_model.py similarity index 97% rename from sempy_labs/RefreshSemanticModel.py rename to sempy_labs/_refresh_semantic_model.py index 747919fa..2eece555 100644 --- a/sempy_labs/RefreshSemanticModel.py +++ b/sempy_labs/_refresh_semantic_model.py @@ -1,7 +1,7 @@ import sempy import sempy.fabric as fabric import time -from ._helper_functions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons @@ -144,8 +144,8 @@ def cancel_dataset_refresh( dataset: str, request_id: Optional[str] = None, workspace: Optional[str] = None ): """ - Cancels the refresh of a semantic model which was executed via the [Enhanced Refresh API](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh). - + Cancels the refresh of a semantic model which was executed via the `Enhanced Refresh API `_ + Parameters ---------- dataset : str diff --git a/sempy_labs/TOM.py b/sempy_labs/_tom.py similarity index 98% rename from sempy_labs/TOM.py rename to sempy_labs/_tom.py index 0237a81c..44592251 100644 --- a/sempy_labs/TOM.py +++ b/sempy_labs/_tom.py @@ -3,10 +3,10 @@ import pandas as pd import re from datetime import datetime -from ._helper_functions import format_dax_object_name -from ._list_functions import list_relationships -from .RefreshSemanticModel import refresh_semantic_model -from ._fallback import check_fallback_reason +from sempy_labs._helper_functions import format_dax_object_name +from sempy_labs._list_functions import list_relationships +from sempy_labs._refresh_semantic_model import refresh_semantic_model +from sempy_labs.directlake._fallback import check_fallback_reason from contextlib import contextmanager from typing import List, Optional, Union, TYPE_CHECKING from sempy._utils._log import log @@ -647,6 +647,7 @@ def set_ols( Name of the column. permission : str The object level security permission for the column. + `Valid values `_ Returns ------- @@ -1065,6 +1066,7 @@ def set_alternate_of( Name of the column. summarization_type : str The summarization type for the column. + `Valid values `_ base_table : str Name of the base table for aggregation. base_column : str @@ -1269,7 +1271,8 @@ def set_extended_property( object : TOM Object An object (i.e. table/column/measure) within a semantic model. extended_property_type : str - The extended property type. Options: 'Json', 'String'. + The extended property type. + `Valid values `_ name : str Name of the extended property. value : str @@ -2515,6 +2518,7 @@ def set_summarize_by( value : bool, default=None The SummarizeBy property value. Defaults to none which resolves to 'Default'. + `Valid values `_ Returns ------- @@ -2559,6 +2563,7 @@ def set_direct_lake_behavior(self, direct_lake_behavior: str): ---------- direct_lake_behavior : str The DirectLakeBehavior property value. + `Valid values `_ Returns ------- @@ -3767,7 +3772,8 @@ def set_encoding_hint(self, table_name: str, column_name: str, value: str): column_name : str Name of the column. value : str - Encoding hint value. Options: 'Value', 'Hash', 'Default'. + Encoding hint value. + `Valid values `_ Returns ------- @@ -3799,6 +3805,7 @@ def set_data_type(self, table_name: str, column_name: str, value: str): Name of the column. value : str The data type. + `Valid values `_ Returns ------- diff --git a/sempy_labs/Translations.py b/sempy_labs/_translations.py similarity index 94% rename from sempy_labs/Translations.py rename to sempy_labs/_translations.py index 9dc4ca3b..5db33bc4 100644 --- a/sempy_labs/Translations.py +++ b/sempy_labs/_translations.py @@ -4,43 +4,6 @@ import sempy_labs._icons as icons -def language_validate(language: str): - """ - Validateds that the language specified exists within the supported langauges. - - Parameters - ---------- - language : str - The language code. - - Returns - ------- - bool - A True/False indication as to whether the language code is supported. - """ - - url = "https://learn.microsoft.com/azure/ai-services/translator/language-support" - - tables = pd.read_html(url) - df = tables[0] - - df_filt = df[df["Language code"] == language] - - df_filt2 = df[df["Language"] == language.capitalize()] - - if len(df_filt) == 1: - lang = df_filt["Language"].iloc[0] - elif len(df_filt2) == 1: - lang = df_filt2["Language"].iloc[0] - else: - print( - f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}." - ) - return - - return lang - - @log def translate_semantic_model( dataset: str, @@ -72,7 +35,7 @@ def translate_semantic_model( from synapse.ml.services import Translate from pyspark.sql.functions import col, flatten from pyspark.sql import SparkSession - from .TOM import connect_semantic_model + from ._tom import connect_semantic_model if isinstance(languages, str): languages = [languages] diff --git a/sempy_labs/Vertipaq.py b/sempy_labs/_vertipaq.py similarity index 99% rename from sempy_labs/Vertipaq.py rename to sempy_labs/_vertipaq.py index f2a132ff..c73dded0 100644 --- a/sempy_labs/Vertipaq.py +++ b/sempy_labs/_vertipaq.py @@ -4,14 +4,14 @@ from IPython.display import display, HTML import zipfile, os, shutil, datetime, warnings from pyspark.sql import SparkSession -from ._helper_functions import ( +from sempy_labs._helper_functions import ( format_dax_object_name, get_direct_lake_sql_endpoint, resolve_lakehouse_name, ) -from ._list_functions import list_relationships -from .lakehouse.GetLakehouseTables import get_lakehouse_tables -from .lakehouse.Lakehouse import lakehouse_attached +from sempy_labs._list_functions import list_relationships +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached from typing import List, Optional, Union from sempy._utils._log import log diff --git a/sempy_labs/directlake/__init__.py b/sempy_labs/directlake/__init__.py index de59562e..4fd46c4f 100644 --- a/sempy_labs/directlake/__init__.py +++ b/sempy_labs/directlake/__init__.py @@ -1,24 +1,53 @@ from sempy_labs.directlake._directlake_schema_compare import ( - direct_lake_schema_compare as direct_lake_schema_compare, + direct_lake_schema_compare ) from sempy_labs.directlake._directlake_schema_sync import ( - direct_lake_schema_sync as direct_lake_schema_sync, + direct_lake_schema_sync +) +from sempy_labs.directlake._fallback import ( + check_fallback_reason, ) from sempy_labs.directlake._get_directlake_lakehouse import ( - get_directlake_lakehouse as get_directlake_lakehouse, + get_direct_lake_lakehouse +) +from sempy_labs.directlake._get_shared_expression import ( + get_shared_expression +) +from sempy_labs.directlake._guardrails import ( + get_direct_lake_guardrails, + get_sku_size, + get_directlake_guardrails_for_sku ) -from sempy_labs.directlake._get_directlake_model_calc_tables import ( - list_directlake_model_calc_tables as list_directlake_model_calc_tables, +from sempy_labs.directlake._list_directlake_model_calc_tables import ( + list_direct_lake_model_calc_tables ) from sempy_labs.directlake._show_unsupported_directlake_objects import ( - show_unsupported_direct_lake_objects as show_unsupported_direct_lake_objects, + show_unsupported_direct_lake_objects ) from sempy_labs.directlake._update_directlake_model_lakehouse_connection import ( - update_direct_lake_model_lakehouse_connection as update_direct_lake_model_lakehouse_connection, + update_direct_lake_model_lakehouse_connection ) from sempy_labs.directlake._update_directlake_partition_entity import ( - update_direct_lake_partition_entity as update_direct_lake_partition_entity, -) -from sempy_labs.directlake._get_shared_expression import ( - get_shared_expression as get_shared_expression, -) + update_direct_lake_partition_entity +) +from sempy_labs.directlake._warm_cache import ( + warm_direct_lake_cache_isresident, + warm_direct_lake_cache_perspective +) + +__all__ = [ + direct_lake_schema_compare, + direct_lake_schema_sync, + check_fallback_reason, + get_direct_lake_lakehouse, + get_shared_expression, + get_direct_lake_guardrails, + get_sku_size, + get_directlake_guardrails_for_sku, + list_direct_lake_model_calc_tables, + show_unsupported_direct_lake_objects, + update_direct_lake_model_lakehouse_connection, + update_direct_lake_partition_entity, + warm_direct_lake_cache_isresident, + warm_direct_lake_cache_perspective +] \ No newline at end of file diff --git a/sempy_labs/directlake/_directlake_schema_compare.py b/sempy_labs/directlake/_directlake_schema_compare.py index f5b83aa1..908a5c41 100644 --- a/sempy_labs/directlake/_directlake_schema_compare.py +++ b/sempy_labs/directlake/_directlake_schema_compare.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric import pandas as pd from sempy_labs._helper_functions import ( @@ -5,7 +6,7 @@ resolve_lakehouse_name, get_direct_lake_sql_endpoint, ) -from sempy_labs.lakehouse import get_lakehouse_columns +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns from sempy_labs._list_functions import list_tables from typing import Optional diff --git a/sempy_labs/directlake/_directlake_schema_sync.py b/sempy_labs/directlake/_directlake_schema_sync.py index 70c7a1a5..e81961a6 100644 --- a/sempy_labs/directlake/_directlake_schema_sync.py +++ b/sempy_labs/directlake/_directlake_schema_sync.py @@ -1,7 +1,7 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .lakehouse.GetLakehouseColumns import get_lakehouse_columns +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns from sempy_labs._helper_functions import ( format_dax_object_name, resolve_lakehouse_name, diff --git a/sempy_labs/_fallback.py b/sempy_labs/directlake/_fallback.py similarity index 100% rename from sempy_labs/_fallback.py rename to sempy_labs/directlake/_fallback.py diff --git a/sempy_labs/directlake/_get_shared_expression.py b/sempy_labs/directlake/_get_shared_expression.py index 6b366273..409ae709 100644 --- a/sempy_labs/directlake/_get_shared_expression.py +++ b/sempy_labs/directlake/_get_shared_expression.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric from sempy_labs._helper_functions import resolve_lakehouse_name from sempy_labs._list_functions import list_lakehouses diff --git a/sempy_labs/Guardrails.py b/sempy_labs/directlake/_guardrails.py similarity index 100% rename from sempy_labs/Guardrails.py rename to sempy_labs/directlake/_guardrails.py diff --git a/sempy_labs/directlake/_show_unsupported_directlake_objects.py b/sempy_labs/directlake/_show_unsupported_directlake_objects.py index 6b335751..5fc88b95 100644 --- a/sempy_labs/directlake/_show_unsupported_directlake_objects.py +++ b/sempy_labs/directlake/_show_unsupported_directlake_objects.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric import pandas as pd from sempy_labs._list_functions import list_tables @@ -9,7 +10,7 @@ def show_unsupported_direct_lake_objects( dataset: str, workspace: Optional[str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ - Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). + Returns a list of a semantic model's objects which are not supported by Direct Lake based on `official documentation `_. Parameters ---------- diff --git a/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py index 0ff42fe6..54a0a1b0 100644 --- a/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +++ b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py @@ -1,8 +1,8 @@ import sempy import sempy.fabric as fabric -from .GetSharedExpression import get_shared_expression +from sempy_labs.directlake._get_shared_expression import get_shared_expression from sempy_labs._helper_functions import resolve_lakehouse_name -from ..TOM import connect_semantic_model +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union diff --git a/sempy_labs/directlake/_update_directlake_partition_entity.py b/sempy_labs/directlake/_update_directlake_partition_entity.py index b1484e93..97a44069 100644 --- a/sempy_labs/directlake/_update_directlake_partition_entity.py +++ b/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -1,5 +1,6 @@ +import sempy import sempy.fabric as fabric -from sempy_labs.TOM import connect_semantic_model +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union diff --git a/sempy_labs/WarmCache.py b/sempy_labs/directlake/_warm_cache.py similarity index 97% rename from sempy_labs/WarmCache.py rename to sempy_labs/directlake/_warm_cache.py index eae67b1b..47aa8609 100644 --- a/sempy_labs/WarmCache.py +++ b/sempy_labs/directlake/_warm_cache.py @@ -4,9 +4,9 @@ from tqdm.auto import tqdm import numpy as np import time -from ._helper_functions import format_dax_object_name -from .RefreshSemanticModel import refresh_semantic_model -from .GetMeasureDependencies import get_measure_dependencies +from sempy_labs._helper_functions import format_dax_object_name +from sempy_labs._refresh_semantic_model import refresh_semantic_model +from sempy_labs._model_dependencies import get_measure_dependencies from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/sempy_labs/lakehouse/__init__.py b/sempy_labs/lakehouse/__init__.py index af2664d9..c3def060 100644 --- a/sempy_labs/lakehouse/__init__.py +++ b/sempy_labs/lakehouse/__init__.py @@ -1,10 +1,27 @@ from sempy_labs.lakehouse._get_lakehouse_columns import ( - get_lakehouse_columns as get_lakehouse_columns, + get_lakehouse_columns ) from sempy_labs.lakehouse._get_lakehouse_tables import ( - get_lakehouse_tables as get_lakehouse_tables, + get_lakehouse_tables ) from sempy_labs.lakehouse._lakehouse import ( - lakehouse_attached as lakehouse_attached, - optimize_lakehouse_tables as optimize_lakehouse_tables, + lakehouse_attached, + optimize_lakehouse_tables ) + +from sempy_labs.lakehouse._shortcuts import ( + list_shortcuts, + #create_shortcut, + create_shortcut_onelake, + delete_shortcut +) +__all__ = [ + get_lakehouse_columns, + get_lakehouse_tables, + lakehouse_attached, + optimize_lakehouse_tables, + list_shortcuts, + #create_shortcut, + create_shortcut_onelake, + delete_shortcut +] \ No newline at end of file diff --git a/sempy_labs/lakehouse/_get_lakehouse_columns.py b/sempy_labs/lakehouse/_get_lakehouse_columns.py index ebc27b30..bc547979 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_columns.py +++ b/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric import pandas as pd from pyspark.sql import SparkSession diff --git a/sempy_labs/lakehouse/_get_lakehouse_tables.py b/sempy_labs/lakehouse/_get_lakehouse_tables.py index a38c7d6d..f9b2e7b2 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_tables.py +++ b/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -5,7 +5,7 @@ import pyarrow.parquet as pq import datetime from sempy_labs._helper_functions import resolve_lakehouse_id, resolve_lakehouse_name -from ..Guardrails import get_sku_size, get_directlake_guardrails_for_sku +from sempy_labs.directlake._guardrails import get_sku_size, get_directlake_guardrails_for_sku from sempy_labs.lakehouse._lakehouse import lakehouse_attached from typing import Optional diff --git a/sempy_labs/lakehouse/_lakehouse.py b/sempy_labs/lakehouse/_lakehouse.py index eebc5f3d..265e66d9 100644 --- a/sempy_labs/lakehouse/_lakehouse.py +++ b/sempy_labs/lakehouse/_lakehouse.py @@ -1,3 +1,4 @@ +import sempy import sempy.fabric as fabric from tqdm.auto import tqdm from pyspark.sql import SparkSession @@ -30,7 +31,7 @@ def optimize_lakehouse_tables( workspace: Optional[str] = None, ): """ - Runs the [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) function over the specified lakehouse tables. + Runs the `OPTIMIZE `_ function over the specified lakehouse tables. Parameters ---------- @@ -45,7 +46,7 @@ def optimize_lakehouse_tables( or if no lakehouse attached, resolves to the workspace of the notebook. """ - from .lakehouse.GetLakehouseTables import get_lakehouse_tables + from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables if workspace == None: workspace_id = fabric.get_workspace_id() diff --git a/sempy_labs/shortcuts.py b/sempy_labs/lakehouse/_shortcuts.py similarity index 96% rename from sempy_labs/shortcuts.py rename to sempy_labs/lakehouse/_shortcuts.py index 9be99197..bf0a2d13 100644 --- a/sempy_labs/shortcuts.py +++ b/sempy_labs/lakehouse/_shortcuts.py @@ -1,7 +1,7 @@ -import sempy_labs +import sempy import sempy.fabric as fabric import pandas as pd -from ._helper_functions import resolve_lakehouse_name, resolve_lakehouse_id +from sempy_labs._helper_functions import resolve_lakehouse_name, resolve_lakehouse_id from typing import List, Optional, Union import sempy_labs._icons as icons @@ -15,7 +15,7 @@ def create_shortcut_onelake( shortcut_name: Optional[str] = None, ): """ - Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to a delta table in OneLake. + Creates a `shortcut `_ to a delta table in OneLake. Parameters ---------- @@ -95,7 +95,7 @@ def create_shortcut( workspace: Optional[str] = None, ): """ - Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to an ADLS Gen2 or Amazon S3 source. + Creates a `shortcut `_ to an ADLS Gen2 or Amazon S3 source. Parameters ---------- diff --git a/sempy_labs/migration/__init__.py b/sempy_labs/migration/__init__.py index d90bec0b..5b71c9bc 100644 --- a/sempy_labs/migration/__init__.py +++ b/sempy_labs/migration/__init__.py @@ -1,16 +1,31 @@ +from sempy_labs.migration._create_pqt_file import ( + create_pqt_file +) from sempy_labs.migration._migrate_calctables_to_lakehouse import ( - migrate_calctables_to_lakehouse as migrate_calctables_to_lakehouse, - migrate_field_parameters as migrate_field_parameters, + migrate_calc_tables_to_lakehouse, + migrate_field_parameters ) from sempy_labs.migration._migrate_calctables_to_semantic_model import ( - migrate_calc_tables_to_semantic_model as migrate_calc_tables_to_semantic_model, + migrate_calc_tables_to_semantic_model ) from sempy_labs.migration._migrate_model_objects_to_semantic_model import ( - migrate_model_objects_to_semantic_model as migrate_model_objects_to_semantic_model, + migrate_model_objects_to_semantic_model ) from sempy_labs.migration._migrate_tables_columns_to_semantic_model import ( - migrate_tables_columns_to_semantic_model as migrate_tables_columns_to_semantic_model, + migrate_tables_columns_to_semantic_model ) from sempy_labs.migration._migration_validation import ( - migration_validation as migration_validation, + migration_validation, + #list_semantic_model_objects ) + +__all__ = [ + create_pqt_file, + migrate_calc_tables_to_lakehouse, + migrate_field_parameters, + migrate_calc_tables_to_semantic_model, + migrate_model_objects_to_semantic_model, + migrate_tables_columns_to_semantic_model, + migration_validation, + #list_semantic_model_objects +] \ No newline at end of file diff --git a/sempy_labs/_create_pqt_file.py b/sempy_labs/migration/_create_pqt_file.py similarity index 96% rename from sempy_labs/_create_pqt_file.py rename to sempy_labs/migration/_create_pqt_file.py index 63034882..fee5ec97 100644 --- a/sempy_labs/_create_pqt_file.py +++ b/sempy_labs/migration/_create_pqt_file.py @@ -1,8 +1,9 @@ +import sempy import sempy.fabric as fabric import json, os, shutil import xml.etree.ElementTree as ET -from ._list_functions import list_tables -from sempy_labs.lakehouse import lakehouse_attached +from sempy_labs._list_functions import list_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached from sempy._utils._log import log from typing import Optional import sempy_labs._icons as icons @@ -13,7 +14,7 @@ def create_pqt_file( dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None ): """ - Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. + Dynamically generates a `Power Query Template `_ file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. Parameters ---------- diff --git a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py index 27a0f49f..e1513ab8 100644 --- a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py +++ b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -2,13 +2,13 @@ import sempy.fabric as fabric import pandas as pd import re, datetime, time -from .lakehouse.GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import ( +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs._helper_functions import ( resolve_lakehouse_name, resolve_lakehouse_id, create_abfss_path, ) -from .TOM import connect_semantic_model +from sempy_labs._tom import connect_semantic_model from pyspark.sql import SparkSession from typing import List, Optional, Union from sempy._utils._log import log diff --git a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py index 470c3942..98db9370 100644 --- a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py +++ b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py @@ -1,8 +1,9 @@ +import sempy import sempy.fabric as fabric import re, datetime, time -from .lakehouse.GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import resolve_lakehouse_name -from .TOM import connect_semantic_model +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._tom import connect_semantic_model from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py index 72381854..a24cf3c0 100644 --- a/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py +++ b/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py @@ -1,9 +1,9 @@ import sempy import sempy.fabric as fabric import re, datetime, time -from ._list_functions import list_tables -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model +from sempy_labs._list_functions import list_tables +from sempy_labs._helper_functions import create_relationship_name +from sempy_labs._tom import connect_semantic_model from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py index 6461f107..a7400081 100644 --- a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +++ b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py @@ -2,11 +2,11 @@ import sempy.fabric as fabric import pandas as pd import datetime, time -from ._list_functions import list_tables -from .GetSharedExpression import get_shared_expression -from .HelperFunctions import resolve_lakehouse_name -from .lakehouse.Lakehouse import lakehouse_attached -from .TOM import connect_semantic_model +from sempy_labs._list_functions import list_tables +from sempy_labs.directlake._get_shared_expression import get_shared_expression +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/sempy_labs/migration/_migration_validation.py b/sempy_labs/migration/_migration_validation.py index 4e0c9c16..2df1b4a5 100644 --- a/sempy_labs/migration/_migration_validation.py +++ b/sempy_labs/migration/_migration_validation.py @@ -1,8 +1,8 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model +from sempy_labs._helper_functions import create_relationship_name +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union from sempy._utils._log import log diff --git a/sempy_labs/RefreshCalcTables.py b/sempy_labs/migration/_refresh_calc_tables.py similarity index 99% rename from sempy_labs/RefreshCalcTables.py rename to sempy_labs/migration/_refresh_calc_tables.py index 3fe8d733..130a6108 100644 --- a/sempy_labs/RefreshCalcTables.py +++ b/sempy_labs/migration/_refresh_calc_tables.py @@ -3,7 +3,7 @@ import pandas as pd import re, datetime, time from pyspark.sql import SparkSession -from .TOM import connect_semantic_model +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/sempy_labs/report/__init__.py b/sempy_labs/report/__init__.py index f908ea90..a3a5d3cf 100644 --- a/sempy_labs/report/__init__.py +++ b/sempy_labs/report/__init__.py @@ -1,15 +1,35 @@ from sempy_labs.report._generate_report import ( - create_report_from_reportjson as create_report_from_reportjson, - update_report_from_reportjson as update_report_from_reportjson, + create_report_from_reportjson, + update_report_from_reportjson ) from sempy_labs.report._report_functions import ( - get_report_json as get_report_json, - report_dependency_tree as report_dependency_tree, - export_report as export_report, - clone_report as clone_report, - launch_report as launch_report, - list_report_pages as list_report_pages, - list_report_visuals as list_report_visuals, - list_report_bookmarks as list_report_bookmarks, - translate_report_titles as translate_report_titles, + get_report_json, + #report_dependency_tree, + export_report, + clone_report, + launch_report, + #list_report_pages, + #list_report_visuals, + #list_report_bookmarks, + #translate_report_titles ) +from sempy_labs.report._report_rebind import ( + report_rebind, + report_rebind_all, +) + +__all__ = [ + create_report_from_reportjson, + update_report_from_reportjson, + get_report_json, + #report_dependency_tree, + export_report, + clone_report, + launch_report, + #list_report_pages, + #list_report_visuals, + #list_report_bookmarks, + #translate_report_titles, + report_rebind, + report_rebind_all +] \ No newline at end of file diff --git a/sempy_labs/report/_report_functions.py b/sempy_labs/report/_report_functions.py index a293c557..5b9093da 100644 --- a/sempy_labs/report/_report_functions.py +++ b/sempy_labs/report/_report_functions.py @@ -7,14 +7,14 @@ from synapse.ml.services import Translate from pyspark.sql.functions import col, flatten from pyspark.sql import SparkSession -from .report._generate_report import update_report_from_reportjson -from .Translations import language_validate -from .lakehouse.Lakehouse import lakehouse_attached -from .HelperFunctions import ( +from sempy_labs.report._generate_report import update_report_from_reportjson +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy_labs._helper_functions import ( generate_embedded_filter, resolve_dataset_name, resolve_report_id, resolve_lakehouse_name, + language_validate ) from typing import List, Optional, Union from sempy._utils._log import log @@ -171,7 +171,8 @@ def export_report( report : str Name of the Power BI report. export_format : str - The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. + The format in which to export the report. For image formats, enter the file extension in this parameter, not 'IMAGE'. + `Valid formats `_ file_name : str, default=None The name of the file to be saved within the lakehouse. Do not include the file extension. Defaults ot the reportName parameter value. bookmark_name : str, default=None diff --git a/sempy_labs/report/_report_rebind.py b/sempy_labs/report/_report_rebind.py index c86fecfc..6d663ab4 100644 --- a/sempy_labs/report/_report_rebind.py +++ b/sempy_labs/report/_report_rebind.py @@ -1,6 +1,6 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_dataset_id, resolve_report_id +from sempy_labs._helper_functions import resolve_dataset_id, resolve_report_id from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons diff --git a/tests/test_shortcuts.py b/tests/test_shortcuts.py index b56057f7..22b0f872 100644 --- a/tests/test_shortcuts.py +++ b/tests/test_shortcuts.py @@ -1,6 +1,6 @@ import pandas as pd from json import loads -from sempy_labs.shortcuts import create_shortcut_onelake +from sempy_labs.lakehouse._shortcuts import create_shortcut_onelake from unittest.mock import MagicMock, PropertyMock, patch From 5289488d86bc62965b542d7775a01d5809564b9d Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 10 Jun 2024 20:22:14 +0000 Subject: [PATCH 17/23] fix circular dependencies --- sempy_labs/_generate_semantic_model.py | 1 - sempy_labs/lakehouse/_get_lakehouse_columns.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sempy_labs/_generate_semantic_model.py b/sempy_labs/_generate_semantic_model.py index 04bfa464..af8ad846 100644 --- a/sempy_labs/_generate_semantic_model.py +++ b/sempy_labs/_generate_semantic_model.py @@ -2,7 +2,6 @@ import sempy.fabric as fabric import pandas as pd import json, base64, time, os -from sempy_labs._generate_semantic_model import get_semantic_model_bim from typing import List, Optional, Union from sempy_labs._helper_functions import resolve_lakehouse_name from sempy_labs.lakehouse._lakehouse import lakehouse_attached diff --git a/sempy_labs/lakehouse/_get_lakehouse_columns.py b/sempy_labs/lakehouse/_get_lakehouse_columns.py index bc547979..7ed371d4 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_columns.py +++ b/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -8,7 +8,6 @@ resolve_lakehouse_id, ) from typing import Optional -from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables def get_lakehouse_columns( @@ -32,6 +31,7 @@ def get_lakehouse_columns( pandas.DataFrame Shows the tables/columns within a lakehouse and their properties. """ + from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables df = pd.DataFrame( columns=[ From 01f708bb96095d4a19c7c360dfb9e5b1c01db5a8 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Mon, 10 Jun 2024 20:42:32 +0000 Subject: [PATCH 18/23] fix init functions --- sempy_labs/__init__.py | 159 +++++++++--------- sempy_labs/_generate_semantic_model.py | 2 + sempy_labs/_helper_functions.py | 3 +- sempy_labs/_model_dependencies.py | 2 + sempy_labs/_refresh_semantic_model.py | 2 +- sempy_labs/directlake/__init__.py | 58 +++---- sempy_labs/lakehouse/__init__.py | 33 ++-- sempy_labs/lakehouse/_get_lakehouse_tables.py | 5 +- sempy_labs/migration/__init__.py | 32 ++-- sempy_labs/report/__init__.py | 40 ++--- sempy_labs/report/_report_functions.py | 2 +- 11 files changed, 163 insertions(+), 175 deletions(-) diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py index 713efdc6..bacd52fe 100644 --- a/sempy_labs/__init__.py +++ b/sempy_labs/__init__.py @@ -1,39 +1,38 @@ from sempy_labs._clear_cache import clear_cache -#from sempy_labs._connections import ( - #create_connection_cloud, - #create_connection_vnet, - #create_connection_on_prem -#) -from sempy_labs._dax import ( - run_dax -) + +# from sempy_labs._connections import ( +# create_connection_cloud, +# create_connection_vnet, +# create_connection_on_prem +# ) +from sempy_labs._dax import run_dax from sempy_labs._generate_semantic_model import ( create_blank_semantic_model, create_semantic_model_from_bim, - #deploy_semantic_model, - get_semantic_model_bim + # deploy_semantic_model, + get_semantic_model_bim, ) from sempy_labs._list_functions import ( get_object_level_security, - #list_annotations, - #list_columns, + # list_annotations, + # list_columns, list_dashboards, list_dataflow_storage_accounts, - #list_datamarts, - #list_datapipelines, - #list_eventstreams, - #list_kpis, - #list_kqldatabases, - #list_kqlquerysets, + # list_datamarts, + # list_datapipelines, + # list_eventstreams, + # list_kpis, + # list_kqldatabases, + # list_kqlquerysets, list_lakehouses, - #list_mirroredwarehouses, - #list_mlexperiments, - #list_mlmodels, - #list_relationships, - #list_sqlendpoints, - #list_tables, + # list_mirroredwarehouses, + # list_mlexperiments, + # list_mlmodels, + # list_relationships, + # list_sqlendpoints, + # list_tables, list_warehouses, - #list_workspace_role_assignments, + # list_workspace_role_assignments, create_warehouse, update_item, ) @@ -50,25 +49,21 @@ resolve_dataset_id, resolve_dataset_name, resolve_report_id, - resolve_report_name, - #language_validate -) -from sempy_labs._model_auto_build import ( - model_auto_build -) -from sempy_labs._model_bpa import ( - model_bpa_rules, - run_model_bpa + resolve_report_name, + # language_validate ) +from sempy_labs._model_auto_build import model_auto_build +from sempy_labs._model_bpa import model_bpa_rules, run_model_bpa from sempy_labs._model_dependencies import ( measure_dependency_tree, get_measure_dependencies, - get_model_calc_dependencies + get_model_calc_dependencies, ) from sempy_labs._one_lake_integration import ( export_model_to_onelake, ) -#from sempy_labs._query_scale_out import ( + +# from sempy_labs._query_scale_out import ( # qso_sync, # qso_sync_status, # set_qso, @@ -76,72 +71,70 @@ # disable_qso, # set_semantic_model_storage_format, # set_workspace_default_storage_format, -#) +# ) from sempy_labs._refresh_semantic_model import ( refresh_semantic_model, - cancel_dataset_refresh -) -from sempy_labs._translations import ( - translate_semantic_model + cancel_dataset_refresh, ) +from sempy_labs._translations import translate_semantic_model from sempy_labs._vertipaq import ( vertipaq_analyzer, - #visualize_vertipaq, - import_vertipaq_analyzer + # visualize_vertipaq, + import_vertipaq_analyzer, ) __all__ = [ - 'clear_cache', - #create_connection_cloud, - #create_connection_vnet, - #create_connection_on_prem, - 'run_dax', - 'create_blank_semantic_model', - 'create_semantic_model_from_bim', + "clear_cache", + # create_connection_cloud, + # create_connection_vnet, + # create_connection_on_prem, + "run_dax", + "create_blank_semantic_model", + "create_semantic_model_from_bim", #'deploy_semantic_model', - 'get_semantic_model_bim', - 'get_object_level_security', + "get_semantic_model_bim", + "get_object_level_security", #'list_annotations', #'list_columns', - 'list_dashboards', - 'list_dataflow_storage_accounts', + "list_dashboards", + "list_dataflow_storage_accounts", #'list_datamarts', #'list_datapipelines', #'list_eventstreams', #'list_kpis', #'list_kqldatabases', #'list_kqlquerysets', - 'list_lakehouses', + "list_lakehouses", #'list_mirroredwarehouses', #'list_mlexperiments', #'list_mlmodels', #'list_relationships', #'list_sqlendpoints', #'list_tables', - 'list_warehouses', + "list_warehouses", #'list_workspace_role_assignments', - 'create_warehouse', - 'update_item', - 'create_abfss_path', - 'format_dax_object_name', - 'create_relationship_name', - 'save_as_delta_table', - 'generate_embedded_filter', - 'get_direct_lake_sql_endpoint', - 'resolve_lakehouse_id', - 'resolve_lakehouse_name', - 'resolve_dataset_id', - 'resolve_dataset_name', - 'resolve_report_id', - 'resolve_report_name', + "create_warehouse", + "update_item", + "create_abfss_path", + "format_dax_object_name", + "create_relationship_name", + "save_as_delta_table", + "generate_embedded_filter", + "get_direct_lake_sql_endpoint", + "resolve_lakehouse_id", + "resolve_lakehouse_name", + "resolve_dataset_id", + "resolve_dataset_name", + "resolve_report_id", + "resolve_report_name", #'language_validate', - 'model_auto_build', - 'model_bpa_rules', - 'run_model_bpa', - 'measure_dependency_tree', - 'get_measure_dependencies', - 'get_model_calc_dependencies', - 'export_model_to_onelake', + "model_auto_build", + "model_bpa_rules", + "run_model_bpa", + "measure_dependency_tree", + "get_measure_dependencies", + "get_model_calc_dependencies", + "export_model_to_onelake", #'qso_sync', #'qso_sync_status', #'set_qso', @@ -149,10 +142,10 @@ #'disable_qso', #'set_semantic_model_storage_format', #'set_workspace_default_storage_format', - 'refresh_semantic_model', - 'cancel_dataset_refresh', - 'translate_semantic_model', - 'vertipaq_analyzer', + "refresh_semantic_model", + "cancel_dataset_refresh", + "translate_semantic_model", + "vertipaq_analyzer", #'visualize_vertipaq', - 'import_vertipaq_analyzer' -] \ No newline at end of file + "import_vertipaq_analyzer", +] diff --git a/sempy_labs/_generate_semantic_model.py b/sempy_labs/_generate_semantic_model.py index af8ad846..91974cc5 100644 --- a/sempy_labs/_generate_semantic_model.py +++ b/sempy_labs/_generate_semantic_model.py @@ -7,6 +7,7 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached import sempy_labs._icons as icons + def create_blank_semantic_model( dataset: str, compatibility_level: Optional[int] = 1605, @@ -201,6 +202,7 @@ def deploy_semantic_model( dataset=new_dataset, bim_file=bim, workspace=new_dataset_workspace ) + def get_semantic_model_bim( dataset: str, workspace: Optional[str] = None, diff --git a/sempy_labs/_helper_functions.py b/sempy_labs/_helper_functions.py index cda7f488..bde6e333 100644 --- a/sempy_labs/_helper_functions.py +++ b/sempy_labs/_helper_functions.py @@ -483,6 +483,7 @@ def save_as_delta_table( f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace." ) + def language_validate(language: str): """ Validateds that the language specified exists within the supported langauges. @@ -517,4 +518,4 @@ def language_validate(language: str): ) return - return lang \ No newline at end of file + return lang diff --git a/sempy_labs/_model_dependencies.py b/sempy_labs/_model_dependencies.py index f5db757d..d1e56789 100644 --- a/sempy_labs/_model_dependencies.py +++ b/sempy_labs/_model_dependencies.py @@ -258,6 +258,8 @@ def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): df = df.drop(["Done"], axis=1) return df + + @log def measure_dependency_tree( dataset: str, measure_name: str, workspace: Optional[str] = None diff --git a/sempy_labs/_refresh_semantic_model.py b/sempy_labs/_refresh_semantic_model.py index 2eece555..06334698 100644 --- a/sempy_labs/_refresh_semantic_model.py +++ b/sempy_labs/_refresh_semantic_model.py @@ -145,7 +145,7 @@ def cancel_dataset_refresh( ): """ Cancels the refresh of a semantic model which was executed via the `Enhanced Refresh API `_ - + Parameters ---------- dataset : str diff --git a/sempy_labs/directlake/__init__.py b/sempy_labs/directlake/__init__.py index 4fd46c4f..d7d66a65 100644 --- a/sempy_labs/directlake/__init__.py +++ b/sempy_labs/directlake/__init__.py @@ -1,53 +1,45 @@ -from sempy_labs.directlake._directlake_schema_compare import ( - direct_lake_schema_compare -) -from sempy_labs.directlake._directlake_schema_sync import ( - direct_lake_schema_sync -) +from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare +from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync from sempy_labs.directlake._fallback import ( check_fallback_reason, ) -from sempy_labs.directlake._get_directlake_lakehouse import ( - get_direct_lake_lakehouse -) -from sempy_labs.directlake._get_shared_expression import ( - get_shared_expression -) +from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse +from sempy_labs.directlake._get_shared_expression import get_shared_expression from sempy_labs.directlake._guardrails import ( get_direct_lake_guardrails, get_sku_size, - get_directlake_guardrails_for_sku + get_directlake_guardrails_for_sku, ) from sempy_labs.directlake._list_directlake_model_calc_tables import ( - list_direct_lake_model_calc_tables + list_direct_lake_model_calc_tables, ) from sempy_labs.directlake._show_unsupported_directlake_objects import ( - show_unsupported_direct_lake_objects + show_unsupported_direct_lake_objects, ) from sempy_labs.directlake._update_directlake_model_lakehouse_connection import ( - update_direct_lake_model_lakehouse_connection + update_direct_lake_model_lakehouse_connection, ) from sempy_labs.directlake._update_directlake_partition_entity import ( - update_direct_lake_partition_entity + update_direct_lake_partition_entity, ) from sempy_labs.directlake._warm_cache import ( warm_direct_lake_cache_isresident, - warm_direct_lake_cache_perspective + warm_direct_lake_cache_perspective, ) __all__ = [ - direct_lake_schema_compare, - direct_lake_schema_sync, - check_fallback_reason, - get_direct_lake_lakehouse, - get_shared_expression, - get_direct_lake_guardrails, - get_sku_size, - get_directlake_guardrails_for_sku, - list_direct_lake_model_calc_tables, - show_unsupported_direct_lake_objects, - update_direct_lake_model_lakehouse_connection, - update_direct_lake_partition_entity, - warm_direct_lake_cache_isresident, - warm_direct_lake_cache_perspective -] \ No newline at end of file + "direct_lake_schema_compare", + "direct_lake_schema_sync", + "check_fallback_reason", + "get_direct_lake_lakehouse", + "get_shared_expression", + "get_direct_lake_guardrails", + "get_sku_size", + "get_directlake_guardrails_for_sku", + "list_direct_lake_model_calc_tables", + "show_unsupported_direct_lake_objects", + "update_direct_lake_model_lakehouse_connection", + "update_direct_lake_partition_entity", + "warm_direct_lake_cache_isresident", + "warm_direct_lake_cache_perspective", +] diff --git a/sempy_labs/lakehouse/__init__.py b/sempy_labs/lakehouse/__init__.py index c3def060..9db8e01a 100644 --- a/sempy_labs/lakehouse/__init__.py +++ b/sempy_labs/lakehouse/__init__.py @@ -1,27 +1,24 @@ -from sempy_labs.lakehouse._get_lakehouse_columns import ( - get_lakehouse_columns -) -from sempy_labs.lakehouse._get_lakehouse_tables import ( - get_lakehouse_tables -) +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables from sempy_labs.lakehouse._lakehouse import ( lakehouse_attached, - optimize_lakehouse_tables + optimize_lakehouse_tables, ) from sempy_labs.lakehouse._shortcuts import ( list_shortcuts, - #create_shortcut, + # create_shortcut, create_shortcut_onelake, - delete_shortcut + delete_shortcut, ) + __all__ = [ - get_lakehouse_columns, - get_lakehouse_tables, - lakehouse_attached, - optimize_lakehouse_tables, - list_shortcuts, - #create_shortcut, - create_shortcut_onelake, - delete_shortcut -] \ No newline at end of file + "get_lakehouse_columns", + "get_lakehouse_tables", + "lakehouse_attached", + "optimize_lakehouse_tables", + "list_shortcuts", + # create_shortcut, + "create_shortcut_onelake", + "delete_shortcut", +] diff --git a/sempy_labs/lakehouse/_get_lakehouse_tables.py b/sempy_labs/lakehouse/_get_lakehouse_tables.py index f9b2e7b2..e907017d 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_tables.py +++ b/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -5,7 +5,10 @@ import pyarrow.parquet as pq import datetime from sempy_labs._helper_functions import resolve_lakehouse_id, resolve_lakehouse_name -from sempy_labs.directlake._guardrails import get_sku_size, get_directlake_guardrails_for_sku +from sempy_labs.directlake._guardrails import ( + get_sku_size, + get_directlake_guardrails_for_sku, +) from sempy_labs.lakehouse._lakehouse import lakehouse_attached from typing import Optional diff --git a/sempy_labs/migration/__init__.py b/sempy_labs/migration/__init__.py index 5b71c9bc..60f78892 100644 --- a/sempy_labs/migration/__init__.py +++ b/sempy_labs/migration/__init__.py @@ -1,31 +1,29 @@ -from sempy_labs.migration._create_pqt_file import ( - create_pqt_file -) +from sempy_labs.migration._create_pqt_file import create_pqt_file from sempy_labs.migration._migrate_calctables_to_lakehouse import ( migrate_calc_tables_to_lakehouse, - migrate_field_parameters + migrate_field_parameters, ) from sempy_labs.migration._migrate_calctables_to_semantic_model import ( - migrate_calc_tables_to_semantic_model + migrate_calc_tables_to_semantic_model, ) from sempy_labs.migration._migrate_model_objects_to_semantic_model import ( - migrate_model_objects_to_semantic_model + migrate_model_objects_to_semantic_model, ) from sempy_labs.migration._migrate_tables_columns_to_semantic_model import ( - migrate_tables_columns_to_semantic_model + migrate_tables_columns_to_semantic_model, ) from sempy_labs.migration._migration_validation import ( migration_validation, - #list_semantic_model_objects + # list_semantic_model_objects ) __all__ = [ - create_pqt_file, - migrate_calc_tables_to_lakehouse, - migrate_field_parameters, - migrate_calc_tables_to_semantic_model, - migrate_model_objects_to_semantic_model, - migrate_tables_columns_to_semantic_model, - migration_validation, - #list_semantic_model_objects -] \ No newline at end of file + "create_pqt_file", + "migrate_calc_tables_to_lakehouse", + "migrate_field_parameters", + "migrate_calc_tables_to_semantic_model", + "migrate_model_objects_to_semantic_model", + "migrate_tables_columns_to_semantic_model", + "migration_validation", + # list_semantic_model_objects +] diff --git a/sempy_labs/report/__init__.py b/sempy_labs/report/__init__.py index a3a5d3cf..51e905f8 100644 --- a/sempy_labs/report/__init__.py +++ b/sempy_labs/report/__init__.py @@ -1,17 +1,17 @@ from sempy_labs.report._generate_report import ( create_report_from_reportjson, - update_report_from_reportjson + update_report_from_reportjson, ) from sempy_labs.report._report_functions import ( get_report_json, - #report_dependency_tree, + # report_dependency_tree, export_report, clone_report, launch_report, - #list_report_pages, - #list_report_visuals, - #list_report_bookmarks, - #translate_report_titles + # list_report_pages, + # list_report_visuals, + # list_report_bookmarks, + # translate_report_titles ) from sempy_labs.report._report_rebind import ( report_rebind, @@ -19,17 +19,17 @@ ) __all__ = [ - create_report_from_reportjson, - update_report_from_reportjson, - get_report_json, - #report_dependency_tree, - export_report, - clone_report, - launch_report, - #list_report_pages, - #list_report_visuals, - #list_report_bookmarks, - #translate_report_titles, - report_rebind, - report_rebind_all -] \ No newline at end of file + "create_report_from_reportjson", + "update_report_from_reportjson", + "get_report_json", + # report_dependency_tree, + "export_report", + "clone_report", + "launch_report", + # list_report_pages, + # list_report_visuals, + # list_report_bookmarks, + # translate_report_titles, + "report_rebind", + "report_rebind_all", +] diff --git a/sempy_labs/report/_report_functions.py b/sempy_labs/report/_report_functions.py index 5b9093da..22153f8b 100644 --- a/sempy_labs/report/_report_functions.py +++ b/sempy_labs/report/_report_functions.py @@ -14,7 +14,7 @@ resolve_dataset_name, resolve_report_id, resolve_lakehouse_name, - language_validate + language_validate, ) from typing import List, Optional, Union from sempy._utils._log import log From 2036adaae712eaa5145b0180b66905dd94928e7e Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 11 Jun 2024 12:59:30 +0000 Subject: [PATCH 19/23] refactor workspace name & id lookup TOM wrapper add to docs --- pyproject.toml | 40 + sempy_labs/__init__.py | 3 + sempy_labs/_ai.py | 1 + sempy_labs/_dax.py | 5 +- sempy_labs/_generate_semantic_model.py | 19 +- sempy_labs/_helper_functions.py | 81 +- sempy_labs/_list_functions.py | 98 +- sempy_labs/_one_lake_integration.py | 15 +- sempy_labs/_refresh_semantic_model.py | 15 +- sempy_labs/_tom.py | 7136 ++++++++--------- .../directlake/_directlake_schema_compare.py | 1 + .../directlake/_get_directlake_lakehouse.py | 29 +- .../directlake/_get_shared_expression.py | 11 +- ...e_directlake_model_lakehouse_connection.py | 11 +- .../_update_directlake_partition_entity.py | 7 +- sempy_labs/directlake/_warm_cache.py | 24 +- .../lakehouse/_get_lakehouse_columns.py | 7 +- sempy_labs/lakehouse/_get_lakehouse_tables.py | 13 +- sempy_labs/lakehouse/_lakehouse.py | 1 + sempy_labs/lakehouse/_shortcuts.py | 40 +- .../_migrate_calctables_to_lakehouse.py | 6 +- .../_migrate_calctables_to_semantic_model.py | 6 +- ...igrate_tables_columns_to_semantic_model.py | 6 +- sempy_labs/migration/_migration_validation.py | 5 +- sempy_labs/migration/_refresh_calc_tables.py | 4 - sempy_labs/report/_generate_report.py | 15 +- sempy_labs/report/_report_functions.py | 37 +- setup.py | 26 - tests/test_tom.py | 31 + 29 files changed, 3704 insertions(+), 3989 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py create mode 100644 tests/test_tom.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..f44c9dd4 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name="semantic-link-labs" +authors = [ + { name = "Microsoft Corporation" }, +] +version="0.4.1" +description="Semantic Link Labs project" +requires-python=">=3.10,<3.12" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", + "Framework :: Jupyter" +] +license= { text = "MIT License" } + +dependencies = [ + "semantic-link-sempy>=0.7.5", + "anytree", + "powerbiclient" +] + +[project.optional-dependencies] +test = [ + "pytest>=8.2.1", +] + +[project.urls] +Repository = "https://github.com/microsoft/semantic-link-labs" + +[[tool.mypy.overrides]] +module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*" +ignore_missing_imports = true \ No newline at end of file diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py index bacd52fe..9a6a5d86 100644 --- a/sempy_labs/__init__.py +++ b/sempy_labs/__init__.py @@ -82,6 +82,7 @@ # visualize_vertipaq, import_vertipaq_analyzer, ) +from sempy_labs._tom import TOMWrapper, connect_semantic_model __all__ = [ "clear_cache", @@ -148,4 +149,6 @@ "vertipaq_analyzer", #'visualize_vertipaq', "import_vertipaq_analyzer", + "TOMWrapper", + "connect_semantic_model", ] diff --git a/sempy_labs/_ai.py b/sempy_labs/_ai.py index 0f253156..48638e27 100644 --- a/sempy_labs/_ai.py +++ b/sempy_labs/_ai.py @@ -5,6 +5,7 @@ from pyspark.sql.functions import col from pyspark.sql import SparkSession from typing import List, Optional, Union +from IPython.display import display def optimize_semantic_model(dataset: str, workspace: Optional[str] = None): diff --git a/sempy_labs/_dax.py b/sempy_labs/_dax.py index 25976be9..75b29f94 100644 --- a/sempy_labs/_dax.py +++ b/sempy_labs/_dax.py @@ -14,7 +14,10 @@ def run_dax( workspace: Optional[str] = None, ): """ - Runs a DAX query against a semantic model. + Runs a DAX query against a semantic model using the `REST API `_. + + Compared to evaluate_dax this allows passing the user name for impersonation. + Note that the REST API has significant limitations compared to the XMLA endpoint. Parameters ---------- diff --git a/sempy_labs/_generate_semantic_model.py b/sempy_labs/_generate_semantic_model.py index 91974cc5..c7b79d00 100644 --- a/sempy_labs/_generate_semantic_model.py +++ b/sempy_labs/_generate_semantic_model.py @@ -3,14 +3,17 @@ import pandas as pd import json, base64, time, os from typing import List, Optional, Union -from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) from sempy_labs.lakehouse._lakehouse import lakehouse_attached import sempy_labs._icons as icons def create_blank_semantic_model( dataset: str, - compatibility_level: Optional[int] = 1605, + compatibility_level: int = 1605, workspace: Optional[str] = None, ): """ @@ -80,11 +83,7 @@ def create_semantic_model_from_bim( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) objectType = "SemanticModel" @@ -228,11 +227,7 @@ def get_semantic_model_bim( The Model.bim file for the semantic model. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) objType = "SemanticModel" client = fabric.FabricRestClient() diff --git a/sempy_labs/_helper_functions.py b/sempy_labs/_helper_functions.py index bde6e333..e1c13edb 100644 --- a/sempy_labs/_helper_functions.py +++ b/sempy_labs/_helper_functions.py @@ -1,9 +1,8 @@ -import sempy import sempy.fabric as fabric import re import pandas as pd from pyspark.sql import SparkSession -from typing import Optional +from typing import Optional, Tuple from uuid import UUID import sempy_labs._icons as icons @@ -32,15 +31,15 @@ def create_abfss_path( return f"abfss://{lakehouse_workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{delta_table_name}" -def format_dax_object_name(a: str, b: str): +def format_dax_object_name(table: str, column: str): """ Formats a table/column combination to the 'Table Name'[Column Name] format. Parameters ---------- - a : str + table : str The name of the table. - b : str + column : str The name of the column. Returns @@ -49,7 +48,7 @@ def format_dax_object_name(a: str, b: str): The fully qualified object name. """ - return "'" + a + "'[" + b + "]" + return "'" + table + "'[" + column + "]" def create_relationship_name( @@ -107,11 +106,6 @@ def resolve_report_id(report: str, workspace: Optional[str] = None): obj = fabric.resolve_item_id(item_name=report, type="Report", workspace=workspace) - # objectType = 'Report' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Display Name'] == report)] - # obj = dfI_filt['Id'].iloc[0] - return obj @@ -142,11 +136,6 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): item_id=report_id, type="Report", workspace=workspace ) - # objectType = 'Report' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Id'] == report_id)] - # obj = dfI_filt['Display Name'].iloc[0] - return obj @@ -177,11 +166,6 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): item_name=dataset, type="SemanticModel", workspace=workspace ) - # objectType = 'SemanticModel' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Display Name'] == dataset)] - # obj = dfI_filt['Id'].iloc[0] - return obj @@ -212,11 +196,6 @@ def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): item_id=dataset_id, type="SemanticModel", workspace=workspace ) - # objectType = 'SemanticModel' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Id'] == dataset_id)] - # obj = dfI_filt['Display Name'].iloc[0] - return obj @@ -247,16 +226,6 @@ def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): item_id=lakehouse_id, type="Lakehouse", workspace=workspace ) - # objectType = 'Lakehouse' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Id'] == lakehouse_id)] - - # if len(dfI_filt) == 0: - # print(f"The '{lakehouse_id}' Lakehouse Id does not exist within the '{workspace}' workspace.") - # return - - # obj = dfI_filt['Display Name'].iloc[0] - return obj @@ -287,20 +256,10 @@ def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): item_name=lakehouse, type="Lakehouse", workspace=workspace ) - # objectType = 'Lakehouse' - # dfI = fabric.list_items(workspace = workspace, type = objectType) - # dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] - - # if len(dfI_filt) == 0: - # print(f"The '{lakehouse}' lakehouse does not exist within the '{workspace}' workspace.") - # return - - # obj = dfI_filt['Id'].iloc[0] - return obj -def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): +def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None) -> UUID: """ Obtains the SQL Endpoint ID of the semantic model. @@ -315,7 +274,7 @@ def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): Returns ------- - UUID + uuid.UUID The ID of SQL Endpoint. """ @@ -519,3 +478,29 @@ def language_validate(language: str): return return lang + + +def resolve_workspace_name_and_id(workspace: Optional[str] = None) -> Tuple[str, str]: + """ + Obtains the name and ID of the Fabric workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str, str + The name and ID of the Fabric workspace. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + return workspace, workspace_id diff --git a/sempy_labs/_list_functions.py b/sempy_labs/_list_functions.py index 13ed07c4..941dd7e4 100644 --- a/sempy_labs/_list_functions.py +++ b/sempy_labs/_list_functions.py @@ -1,9 +1,9 @@ -import sempy import sempy.fabric as fabric +from sempy_labs._helper_functions import resolve_workspace_name_and_id import pandas as pd import json, time from pyspark.sql import SparkSession -from typing import List, Optional, Union +from typing import Optional def get_object_level_security(dataset: str, workspace: Optional[str] = None): @@ -198,8 +198,8 @@ def list_annotations(dataset: str, workspace: Optional[str] = None): pName = p.Name objectType = "Partition" for pa in p.Annotations: - paName = paName - paValue = paValue + paName = pa.Name + paValue = pa.Value new_data = { "Object Name": pName, "Parent Object Name": tName, @@ -393,7 +393,7 @@ def list_columns( if isDirectLake: dfC["Column Cardinality"] = None sql_statements = [] - lakeID, lakeName = get_direct_lake_lakehouse( + (lakeID, lakeName) = get_direct_lake_lakehouse( dataset=dataset, workspace=workspace, lakehouse=lakehouse, @@ -545,11 +545,7 @@ def list_lakehouses(workspace: Optional[str] = None): ] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/") @@ -609,11 +605,7 @@ def list_warehouses(workspace: Optional[str] = None): ] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/warehouses/") @@ -659,11 +651,7 @@ def list_sqlendpoints(workspace: Optional[str] = None): df = pd.DataFrame(columns=["SQL Endpoint ID", "SQL Endpoint Name", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/sqlEndpoints/") @@ -704,11 +692,7 @@ def list_mirroredwarehouses(workspace: Optional[str] = None): columns=["Mirrored Warehouse", "Mirrored Warehouse ID", "Description"] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mirroredWarehouses/") @@ -757,11 +741,7 @@ def list_kqldatabases(workspace: Optional[str] = None): ] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlDatabases/") @@ -809,11 +789,7 @@ def list_kqlquerysets(workspace: Optional[str] = None): df = pd.DataFrame(columns=["KQL Queryset Name", "KQL Queryset ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlQuerysets/") @@ -852,11 +828,7 @@ def list_mlmodels(workspace: Optional[str] = None): df = pd.DataFrame(columns=["ML Model Name", "ML Model ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlModels/") @@ -895,11 +867,7 @@ def list_eventstreams(workspace: Optional[str] = None): df = pd.DataFrame(columns=["Eventstream Name", "Eventstream ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/eventstreams/") @@ -938,11 +906,7 @@ def list_datapipelines(workspace: Optional[str] = None): df = pd.DataFrame(columns=["Data Pipeline Name", "Data Pipeline ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/dataPipelines/") @@ -981,11 +945,7 @@ def list_mlexperiments(workspace: Optional[str] = None): df = pd.DataFrame(columns=["ML Experiment Name", "ML Experiment ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlExperiments/") @@ -1024,11 +984,7 @@ def list_datamarts(workspace: Optional[str] = None): df = pd.DataFrame(columns=["Datamart Name", "Datamart ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/datamarts/") @@ -1070,11 +1026,7 @@ def create_warehouse( """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if description == None: request_body = {"displayName": warehouse} @@ -1132,17 +1084,9 @@ def update_item( The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) itemTypes = { "DataPipeline": "dataPipelines", @@ -1404,11 +1348,7 @@ def list_workspace_role_assignments(workspace: Optional[str] = None): A pandas dataframe showing the members of a given workspace and their roles. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) df = pd.DataFrame(columns=["User Name", "User Email", "Role Name", "Type"]) diff --git a/sempy_labs/_one_lake_integration.py b/sempy_labs/_one_lake_integration.py index d73d104d..cf22e4f7 100644 --- a/sempy_labs/_one_lake_integration.py +++ b/sempy_labs/_one_lake_integration.py @@ -1,8 +1,8 @@ -import sempy import sempy.fabric as fabric import pandas as pd -from typing import List, Optional, Union +from typing import Optional from sempy._utils._log import log +from sempy_labs._helper_functions import resolve_workspace_name_and_id @log @@ -27,18 +27,9 @@ def export_model_to_onelake( The name of the Fabric lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. destination_workspace : str, default=None The name of the Fabric workspace in which the lakehouse resides. - - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if destination_workspace == None: destination_workspace = workspace diff --git a/sempy_labs/_refresh_semantic_model.py b/sempy_labs/_refresh_semantic_model.py index 06334698..a6b0fd95 100644 --- a/sempy_labs/_refresh_semantic_model.py +++ b/sempy_labs/_refresh_semantic_model.py @@ -5,6 +5,7 @@ from typing import List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons +from sempy_labs._helper_functions import resolve_workspace_name_and_id @log @@ -38,10 +39,6 @@ def refresh_semantic_model( The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: @@ -157,17 +154,9 @@ def cancel_dataset_refresh( The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) rr = fabric.list_refresh_requests(dataset=dataset, workspace=workspace) rr_filt = rr[rr["Status"] == "Unknown"] diff --git a/sempy_labs/_tom.py b/sempy_labs/_tom.py index 44592251..8f3d1fcf 100644 --- a/sempy_labs/_tom.py +++ b/sempy_labs/_tom.py @@ -14,6 +14,7 @@ if TYPE_CHECKING: import Microsoft.AnalysisServices.Tabular + import Microsoft.AnalysisServices.Tabular as TOM checked = "\u2611" @@ -22,3902 +23,3741 @@ end_bold = "\033[0m" -@log -@contextmanager -def connect_semantic_model( - dataset: str, readonly: Optional[bool] = True, workspace: Optional[str] = None -): +class TOMWrapper: """ - Connects to the Tabular Object Model (TOM) within a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - readonly: bool, default=True - Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - str - A connection to the semantic model's Tabular Object Model. + Convenience wrapper around the TOM object model for a semantic model. Always use connect_semantic_model function to make sure the TOM object is initialized correctly. """ - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - import System - - if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - fpAdded = [] - - class TOMWrapper: - - def __init__(self, dataset, workspace, readonly): - - tom_server = fabric.create_tom_server( - readonly=readonly, workspace=workspace - ) - self.model = tom_server.Databases.GetByName(dataset).Model - - def all_columns(self): - """ - Outputs a list of all columns within all tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Column] - All columns within the semantic model. - """ - - for t in self.model.Tables: - for c in t.Columns: - if c.Type != TOM.ColumnType.RowNumber: - yield c - - def all_calculated_columns(self): - """ - Outputs a list of all calculated columns within all tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Column] - All calculated columns within the semantic model. - """ - - for t in self.model.Tables: - for c in t.Columns: - if c.Type == TOM.ColumnType.Calculated: - yield c - - def all_calculated_tables(self): - """ - Outputs a list of all calculated tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Table] - All calculated tables within the semantic model. - """ - - for t in self.model.Tables: - if any(p.SourceType == TOM.ColumnType.Calculated for p in t.Partitions): - yield t - - def all_calculation_groups(self): - """ - Outputs a list of all calculation groups in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Table] - All calculation groups within the semantic model. - """ - - for t in self.model.Tables: - if t.CalculationGroup is not None: - yield t + dataset: str + workspace: str + readonly: bool + tables_added: List[str] + + def __init__(self, dataset, workspace, readonly): + self.dataset = dataset + self.workspace = workspace + self.readonly = readonly + self.tables_added = [] + + self.tom_server = fabric.create_tom_server( + readonly=readonly, workspace=workspace + ) + self.model = self.tom_server.Databases.GetByName(dataset).Model + + def all_columns(self): + """ + Outputs a list of all columns within all tables in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Column] + All columns within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + for c in t.Columns: + if c.Type != TOM.ColumnType.RowNumber: + yield c - def all_measures(self): - """ - Outputs a list of all measures in the semantic model. + def all_calculated_columns(self): + """ + Outputs a list of all calculated columns within all tables in the semantic model. - Parameters - ---------- + Parameters + ---------- - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Measure] - All measures within the semantic model. - """ + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Column] + All calculated columns within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM - for t in self.model.Tables: - for m in t.Measures: - yield m + for t in self.model.Tables: + for c in t.Columns: + if c.Type == TOM.ColumnType.Calculated: + yield c - def all_partitions(self): - """ - Outputs a list of all partitions in the semantic model. + def all_calculated_tables(self): + """ + Outputs a list of all calculated tables in the semantic model. - Parameters - ---------- + Parameters + ---------- - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Partition] - All partitions within the semantic model. - """ + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Table] + All calculated tables within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM - for t in self.model.Tables: - for p in t.Partitions: - yield p + for t in self.model.Tables: + if any(p.SourceType == TOM.ColumnType.Calculated for p in t.Partitions): + yield t - def all_hierarchies(self): - """ - Outputs a list of all hierarchies in the semantic model. + def all_calculation_groups(self): + """ + Outputs a list of all calculation groups in the semantic model. - Parameters - ---------- + Parameters + ---------- - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Hierarchy] - All hierarchies within the semantic model. - """ + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Table] + All calculation groups within the semantic model. + """ - for t in self.model.Tables: - for h in t.Hierarchies: - yield h + for t in self.model.Tables: + if t.CalculationGroup is not None: + yield t - def all_levels(self): - """ - Outputs a list of all levels in the semantic model. + def all_measures(self): + """ + Outputs a list of all measures in the semantic model. - Parameters - ---------- + Parameters + ---------- - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Level] - All levels within the semantic model. - """ + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Measure] + All measures within the semantic model. + """ - for t in self.model.Tables: - for h in t.Hierarchies: - for l in h.Levels: - yield l + for t in self.model.Tables: + for m in t.Measures: + yield m - def all_calculation_items(self): - """ - Outputs a list of all calculation items in the semantic model. + def all_partitions(self): + """ + Outputs a list of all partitions in the semantic model. - Parameters - ---------- + Parameters + ---------- - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.CalculationItem] - All calculation items within the semantic model. - """ + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Partition] + All partitions within the semantic model. + """ - for t in self.model.Tables: - if t.CalculationGroup is not None: - for ci in t.CalculationGroup.CalculationItems: - yield ci - - def all_rls(self): - """ - Outputs a list of all row level security expressions in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.TablePermission] - All row level security expressions within the semantic model. - """ - - for r in self.model.Roles: - for tp in r.TablePermissions: - yield tp - - def add_measure( - self, - table_name: str, - measure_name: str, - expression: str, - format_string: Optional[str] = None, - hidden: Optional[bool] = False, - description: Optional[str] = None, - display_folder: Optional[str] = None, - ): - """ - Adds a measure to the semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the measure will be created. - measure_name : str - Name of the measure. - expression : str - DAX expression of the measure. - format_string : str, default=None - Format string of the measure. - hidden : bool, default=False - Whether the measure will be hidden or visible. - description : str, default=None - A description of the measure. - display_folder : str, default=None - The display folder in which the measure will reside. - - Returns - ------- - - """ - - obj = TOM.Measure() - obj.Name = measure_name - obj.Expression = expression - obj.IsHidden = hidden - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - - self.model.Tables[table_name].Measures.Add(obj) - - def add_calculated_table_column( - self, - table_name: str, - column_name: str, - source_column: str, - data_type: str, - format_string: Optional[str] = None, - hidden: Optional[bool] = False, - description: Optional[str] = None, - display_folder: Optional[str] = None, - data_category: Optional[str] = None, - key: Optional[bool] = False, - summarize_by: Optional[str] = None, - ): - """ - Adds a calculated table column to a calculated table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - source_column : str - The source column for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = ( - data_type.capitalize() - .replace("Integer", "Int64") - .replace("Datetime", "DateTime") - ) - if summarize_by is None: - summarize_by = "Default" - summarize_by = ( - summarize_by.capitalize() - .replace("Distinctcount", "DistinctCount") - .replace("Avg", "Average") + for t in self.model.Tables: + for p in t.Partitions: + yield p + + def all_hierarchies(self): + """ + Outputs a list of all hierarchies in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Hierarchy] + All hierarchies within the semantic model. + """ + + for t in self.model.Tables: + for h in t.Hierarchies: + yield h + + def all_levels(self): + """ + Outputs a list of all levels in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Level] + All levels within the semantic model. + """ + + for t in self.model.Tables: + for h in t.Hierarchies: + for l in h.Levels: + yield l + + def all_calculation_items(self): + """ + Outputs a list of all calculation items in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.CalculationItem] + All calculation items within the semantic model. + """ + + for t in self.model.Tables: + if t.CalculationGroup is not None: + for ci in t.CalculationGroup.CalculationItems: + yield ci + + def all_rls(self): + """ + Outputs a list of all row level security expressions in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.TablePermission] + All row level security expressions within the semantic model. + """ + + for r in self.model.Roles: + for tp in r.TablePermissions: + yield tp + + def add_measure( + self, + table_name: str, + measure_name: str, + expression: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + ): + """ + Adds a measure to the semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the measure will be created. + measure_name : str + Name of the measure. + expression : str + DAX expression of the measure. + format_string : str, default=None + Format string of the measure. + hidden : bool, default=False + Whether the measure will be hidden or visible. + description : str, default=None + A description of the measure. + display_folder : str, default=None + The display folder in which the measure will reside. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + obj = TOM.Measure() + obj.Name = measure_name + obj.Expression = expression + obj.IsHidden = hidden + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + + self.model.Tables[table_name].Measures.Add(obj) + + def add_calculated_table_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a calculated table column to a calculated table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + source_column : str + The source column for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.CalculatedTableColumn() + obj.Name = column_name + obj.SourceColumn = source_column + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsHidden = hidden + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_data_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a data column to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + source_column : str + The source column for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.DataColumn() + obj.Name = column_name + obj.SourceColumn = source_column + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsHidden = hidden + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_calculated_column( + self, + table_name: str, + column_name: str, + expression: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a calculated column to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + expression : str + The DAX expression for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.CalculatedColumn() + obj.Name = column_name + obj.Expression = expression + obj.IsHidden = hidden + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_calculation_item( + self, + table_name: str, + calculation_item_name: str, + expression: str, + ordinal: Optional[int] = None, + format_string_expression: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds a calculation item to a calculation group within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the calculation item will be created. + calculation_item_name : str + Name of the calculation item. + expression : str + The DAX expression for the calculation item. + ordinal : int, default=None + The ordinal of the calculation item. + format_string_expression : str, default=None + The format string expression for the calculation item. + description : str, default=None + A description of the calculation item. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + obj = TOM.CalculationItem() + fsd = TOM.FormatStringDefinition() + obj.Name = calculation_item_name + obj.Expression = expression + if ordinal is not None: + obj.Ordinal = ordinal + if description is not None: + obj.Description = description + if format_string_expression is not None: + obj.FormatStringDefinition = fsd.Expression = format_string_expression + self.model.Tables[table_name].CalculationGroup.CalculationItems.Add(obj) + + def add_role( + self, + role_name: str, + model_permission: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds a role to a semantic model. + + Parameters + ---------- + role_name : str + Name of the role. + model_permission : str, default=None + The model permission for the role. + Defaults to None which resolves to 'Read'. + description : str, default=None + A description of the role. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if model_permission is None: + model_permission = "Read" + + obj = TOM.ModelRole() + obj.Name = role_name + obj.ModelPermission = System.Enum.Parse(TOM.ModelPermission, model_permission) + if description is not None: + obj.Description = description + self.model.Roles.Add(obj) + + def set_rls(self, role_name: str, table_name: str, filter_expression: str): + """ + Sets the row level security permissions for a table within a role. + + Parameters + ---------- + role_name : str + Name of the role. + table_name : str + Name of the table. + filter_expression : str + The DAX expression containing the row level security filter expression logic. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + tp = TOM.TablePermission() + tp.Table = self.model.Tables[table_name] + tp.FilterExpression = filter_expression + + try: + self.model.Roles[role_name].TablePermissions[ + table_name + ].FilterExpression = filter_expression + except: + self.model.Roles[role_name].TablePermissions.Add(tp) + + def set_ols( + self, role_name: str, table_name: str, column_name: str, permission: str + ): + """ + Sets the object level security permissions for a column within a role. + + Parameters + ---------- + role_name : str + Name of the role. + table_name : str + Name of the table. + column_name : str + Name of the column. + permission : str + The object level security permission for the column. + `Valid values `_ + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + permission = permission.capitalize() + + if permission not in ["Read", "None", "Default"]: + print(f"ERROR! Invalid 'permission' value.") + return + + cp = TOM.ColumnPermission() + cp.Column = self.model.Tables[table_name].Columns[column_name] + cp.MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + try: + self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions[ + column_name + ].MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + except: + self.model.Roles[role_name].TablePermissions[ + table_name + ].ColumnPermissions.Add(cp) + + def add_hierarchy( + self, + table_name: str, + hierarchy_name: str, + columns: List[str], + levels: Optional[List[str]] = None, + hierarchy_description: Optional[str] = None, + hierarchy_hidden: Optional[bool] = False, + ): + """ + Adds a hierarchy to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + hierarchy_name : str + Name of the hierarchy. + columns : List[str] + Names of the columns to use within the hierarchy. + levels : List[str], default=None + Names of the levels to use within the hierarhcy (instead of the column names). + hierarchy_description : str, default=None + A description of the hierarchy. + hierarchy_hidden : bool, default=False + Whether the hierarchy is visible or hidden. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + if isinstance(columns, str): + print( + f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']" ) + return + if len(columns) == 1: + print(f"There must be at least 2 levels in order to create a hierarchy.") + return - obj = TOM.CalculatedTableColumn() - obj.Name = column_name - obj.SourceColumn = source_column - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsHidden = hidden - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_data_column( - self, - table_name: str, - column_name: str, - source_column: str, - data_type: str, - format_string: Optional[str] = None, - hidden: Optional[bool] = False, - description: Optional[str] = None, - display_folder: Optional[str] = None, - data_category: Optional[str] = None, - key: Optional[bool] = False, - summarize_by: Optional[str] = None, - ): - """ - Adds a data column to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - source_column : str - The source column for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = ( - data_type.capitalize() - .replace("Integer", "Int64") - .replace("Datetime", "DateTime") - ) - if summarize_by is None: - summarize_by = "Default" - summarize_by = ( - summarize_by.capitalize() - .replace("Distinctcount", "DistinctCount") - .replace("Avg", "Average") - ) + if levels is None: + levels = columns - obj = TOM.DataColumn() - obj.Name = column_name - obj.SourceColumn = source_column - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsHidden = hidden - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_calculated_column( - self, - table_name: str, - column_name: str, - expression: str, - data_type: str, - format_string: Optional[str] = None, - hidden: Optional[bool] = False, - description: Optional[str] = None, - display_folder: Optional[str] = None, - data_category: Optional[str] = None, - key: Optional[bool] = False, - summarize_by: Optional[str] = None, - ): - """ - Adds a calculated column to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - expression : str - The DAX expression for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = ( - data_type.capitalize() - .replace("Integer", "Int64") - .replace("Datetime", "DateTime") - ) - if summarize_by is None: - summarize_by = "Default" - summarize_by = ( - summarize_by.capitalize() - .replace("Distinctcount", "DistinctCount") - .replace("Avg", "Average") + if len(columns) != len(levels): + print( + f"If specifying level names, you must specify a level for each column." ) - - obj = TOM.CalculatedColumn() - obj.Name = column_name - obj.Expression = expression - obj.IsHidden = hidden - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_calculation_item( - self, - table_name: str, - calculation_item_name: str, - expression: str, - ordinal: Optional[int] = None, - format_string_expression: Optional[str] = None, - description: Optional[str] = None, - ): - """ - Adds a calculation item to a calculation group within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the calculation item will be created. - calculation_item_name : str - Name of the calculation item. - expression : str - The DAX expression for the calculation item. - ordinal : int, default=None - The ordinal of the calculation item. - format_string_expression : str, default=None - The format string expression for the calculation item. - description : str, default=None - A description of the calculation item. - - Returns - ------- - - """ - - obj = TOM.CalculationItem() - fsd = TOM.FormatStringDefinition() - obj.Name = calculation_item_name - obj.Expression = expression - if ordinal is not None: - obj.Ordinal = ordinal - if description is not None: - obj.Description = description - if format_string_expression is not None: - obj.FormatStringDefinition = fsd.Expression = format_string_expression - self.model.Tables[table_name].CalculationGroup.CalculationItems.Add(obj) - - def add_role( - self, - role_name: str, - model_permission: Optional[str] = None, - description: Optional[str] = None, - ): - """ - Adds a role to a semantic model. - - Parameters - ---------- - role_name : str - Name of the role. - model_permission : str, default=None - The model permission for the role. - Defaults to None which resolves to 'Read'. - description : str, default=None - A description of the role. - - Returns - ------- - - """ - - if model_permission is None: - model_permission = "Read" - - obj = TOM.ModelRole() - obj.Name = role_name - obj.ModelPermission = System.Enum.Parse( - TOM.ModelPermission, model_permission + return + + obj = TOM.Hierarchy() + obj.Name = hierarchy_name + obj.IsHidden = hierarchy_hidden + if hierarchy_description is not None: + obj.Description = hierarchy_description + self.model.Tables[table_name].Hierarchies.Add(obj) + + for col in columns: + lvl = TOM.Level() + lvl.Column = self.model.Tables[table_name].Columns[col] + lvl.Name = levels[columns.index(col)] + lvl.Ordinal = columns.index(col) + self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add(lvl) + + def add_relationship( + self, + from_table: str, + from_column: str, + to_table: str, + to_column: str, + from_cardinality: str, + to_cardinality: str, + cross_filtering_behavior: Optional[str] = None, + is_active: Optional[bool] = True, + security_filtering_behavior: Optional[str] = None, + rely_on_referential_integrity: Optional[bool] = False, + ): + """ + Adds a relationship to a semantic model. + + Parameters + ---------- + from_table : str + Name of the table on the 'from' side of the relationship. + from_column : str + Name of the column on the 'from' side of the relationship. + to_table : str + Name of the table on the 'to' side of the relationship. + to_column : str + Name of the column on the 'to' side of the relationship. + from_cardinality : str + The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. + to_cardinality : str + The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. + cross_filtering_behavior : str, default=None + Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). + Defaults to None which resolves to 'Automatic'. + is_active : bool, default=True + Setting for whether the relationship is active or not. + security_filtering_behavior : str, default=None + Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). + Defaults to None which resolves to 'OneDirection'. + rely_on_referential_integrity : bool, default=False + Setting for the rely on referential integrity of the relationship. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if cross_filtering_behavior is None: + cross_filtering_behavior = "Automatic" + if security_filtering_behavior is None: + security_filtering_behavior = "OneDirection" + + from_cardinality = from_cardinality.capitalize() + to_cardinality = to_cardinality.capitalize() + cross_filtering_behavior = cross_filtering_behavior.capitalize() + security_filtering_behavior = security_filtering_behavior.capitalize() + security_filtering_behavior = security_filtering_behavior.replace( + "direct", "Direct" + ) + cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct") + + rel = TOM.SingleColumnRelationship() + rel.FromColumn = self.model.Tables[from_table].Columns[from_column] + rel.FromCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, from_cardinality + ) + rel.ToColumn = self.model.Tables[to_table].Columns[to_column] + rel.ToCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, to_cardinality + ) + rel.IsActive = is_active + rel.CrossFilteringBehavior = System.Enum.Parse( + TOM.CrossFilteringBehavior, cross_filtering_behavior + ) + rel.SecurityFilteringBehavior = System.Enum.Parse( + TOM.SecurityFilteringBehavior, security_filtering_behavior + ) + rel.RelyOnReferentialIntegrity = rely_on_referential_integrity + + self.model.Relationships.Add(rel) + + def add_calculation_group( + self, + name: str, + precedence: int, + description: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a calculation group to a semantic model. + + Parameters + ---------- + name : str + Name of the calculation group. + precedence : int + The precedence of the calculation group. + description : str, default=None + A description of the calculation group. + hidden : bool, default=False + Whether the calculation group is hidden/visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + tbl = TOM.Table() + tbl.Name = name + tbl.CalculationGroup = TOM.CalculationGroup() + tbl.CalculationGroup.Precedence = precedence + tbl.IsHidden = hidden + if description is not None: + tbl.Description = description + + part = TOM.Partition() + part.Name = name + part.Source = TOM.CalculationGroupSource() + tbl.Partitions.Add(part) + + sortCol = "Ordinal" + + col1 = TOM.DataColumn() + col1.Name = sortCol + col1.SourceColumn = sortCol + col1.IsHidden = True + col1.DataType = System.Enum.Parse(TOM.DataType, "Int64") + + tbl.Columns.Add(col1) + + col2 = TOM.DataColumn() + col2.Name = "Name" + col2.SourceColumn = "Name" + col2.DataType = System.Enum.Parse(TOM.DataType, "String") + # col.SortByColumn = m.Tables[name].Columns[sortCol] + tbl.Columns.Add(col2) + + self.model.DiscourageImplicitMeasures = True + self.model.Tables.Add(tbl) + + def add_expression( + self, name: str, expression: str, description: Optional[str] = None + ): + """ + Adds an expression to a semantic model. + + Parameters + ---------- + name : str + Name of the expression. + expression: str + The M expression of the expression. + description : str, default=None + A description of the expression. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + exp = TOM.NamedExpression() + exp.Name = name + if description is not None: + exp.Description = description + exp.Kind = TOM.ExpressionKind.M + exp.Expression = expression + + self.model.Expressions.Add(exp) + + def add_translation(self, language: str): + """ + Adds a translation language (culture) to a semantic model. + + Parameters + ---------- + language : str + The language code (i.e. 'it-IT' for Italian). + """ + import Microsoft.AnalysisServices.Tabular as TOM + + cul = TOM.Culture() + cul.Name = language + + try: + self.model.Cultures.Add(cul) + except: + pass + + def add_perspective(self, perspective_name: str): + """ + Adds a perspective to a semantic model. + + Parameters + ---------- + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + persp = TOM.Perspective() + persp.Name = perspective_name + self.model.Perspectives.Add(persp) + + def add_m_partition( + self, + table_name: str, + partition_name: str, + expression: str, + mode: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds an M-partition to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + partition_name : str + Name of the partition. + expression : str + The M expression encapsulating the logic for the partition. + mode : str, default=None + The query mode for the partition. + Defaults to None which resolves to 'Import'. + description : str, default=None + A description for the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + mode = ( + mode.title() + .replace("query", "Query") + .replace(" ", "") + .replace("lake", "Lake") + ) + + mp = TOM.MPartitionSource() + mp.Expression = expression + p = TOM.Partition() + p.Name = partition_name + p.Source = mp + if description is not None: + p.Description = description + if mode is None: + mode = "Default" + p.Mode = System.Enum.Parse(TOM.ModeType, mode) + + self.model.Tables[table_name].Partitions.Add(p) + + def add_entity_partition( + self, + table_name: str, + entity_name: str, + expression: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds an entity partition to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + entity_name : str + Name of the lakehouse table. + expression : TOM Object, default=None + The expression used by the table. + Defaults to None which resolves to the 'DatabaseQuery' expression. + description : str, default=None + A description for the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + ep = TOM.EntityPartitionSource() + ep.Name = table_name + ep.EntityName = entity_name + if expression is None: + ep.ExpressionSource = self.model.Expressions["DatabaseQuery"] + else: + ep.ExpressionSource = expression + p = TOM.Partition() + p.Name = table_name + p.Source = ep + p.Mode = TOM.ModeType.DirectLake + if description is not None: + p.Description = description + + self.model.Tables[table_name].Partitions.Add(p) + + def set_alternate_of( + self, + table_name: str, + column_name: str, + summarization_type: str, + base_table: str, + base_column: Optional[str] = None, + ): + """ + Sets the 'alternate of' property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + summarization_type : str + The summarization type for the column. + `Valid values `_ + base_table : str + Name of the base table for aggregation. + base_column : str + Name of the base column for aggregation + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if base_column is not None and base_table is None: + print( + f"ERROR: If you specify the base table you must also specify the base column" ) - if description is not None: - obj.Description = description - self.model.Roles.Add(obj) - def set_rls(self, role_name: str, table_name: str, filter_expression: str): - """ - Sets the row level security permissions for a table within a role. + summarization_type = ( + summarization_type.replace(" ", "") + .capitalize() + .replace("Groupby", "GroupBy") + ) - Parameters - ---------- - role_name : str - Name of the role. - table_name : str - Name of the table. - filter_expression : str - The DAX expression containing the row level security filter expression logic. - - Returns - ------- - - """ - - tp = TOM.TablePermission() - tp.Table = self.model.Tables[table_name] - tp.FilterExpression = filter_expression - - try: - self.model.Roles[role_name].TablePermissions[ - table_name - ].FilterExpression = filter_expression - except: - self.model.Roles[role_name].TablePermissions.Add(tp) - - def set_ols( - self, role_name: str, table_name: str, column_name: str, permission: str - ): - """ - Sets the object level security permissions for a column within a role. - - Parameters - ---------- - role_name : str - Name of the role. - table_name : str - Name of the table. - column_name : str - Name of the column. - permission : str - The object level security permission for the column. - `Valid values `_ - - Returns - ------- - - """ - - permission = permission.capitalize() - - if permission not in ["Read", "None", "Default"]: - print(f"ERROR! Invalid 'permission' value.") - return - - cp = TOM.ColumnPermission() - cp.Column = self.model.Tables[table_name].Columns[column_name] - cp.MetadataPermission = System.Enum.Parse( - TOM.MetadataPermission, permission + summarizationTypes = ["Sum", "GroupBy", "Count", "Min", "Max"] + if summarization_type not in summarizationTypes: + print( + f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}." ) - try: - self.model.Roles[role_name].TablePermissions[ - table_name - ].ColumnPermissions[column_name].MetadataPermission = System.Enum.Parse( - TOM.MetadataPermission, permission - ) - except: - self.model.Roles[role_name].TablePermissions[ - table_name - ].ColumnPermissions.Add(cp) - - def add_hierarchy( - self, - table_name: str, - hierarchy_name: str, - columns: List[str], - levels: Optional[List[str]] = None, - hierarchy_description: Optional[str] = None, - hierarchy_hidden: Optional[bool] = False, - ): - """ - Adds a hierarchy to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - hierarchy_name : str - Name of the hierarchy. - columns : List[str] - Names of the columns to use within the hierarchy. - levels : List[str], default=None - Names of the levels to use within the hierarhcy (instead of the column names). - hierarchy_description : str, default=None - A description of the hierarchy. - hierarchy_hidden : bool, default=False - Whether the hierarchy is visible or hidden. - - Returns - ------- - - """ - - if isinstance(columns, str): - print( - f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']" - ) - return - if len(columns) == 1: - print( - f"There must be at least 2 levels in order to create a hierarchy." - ) - return - - if levels is None: - levels = columns - - if len(columns) != len(levels): - print( - f"If specifying level names, you must specify a level for each column." - ) - return + return + + ao = TOM.AlternateOf() + ao.Summarization = System.Enum.Parse(TOM.SummarizationType, summarization_type) + if base_column is not None: + ao.BaseColumn = self.model.Tables[base_table].Columns[base_column] + else: + ao.BaseTable = self.model.Tables[base_table] + + self.model.Tables[table_name].Columns[column_name].AlternateOf = ao + + # Hide agg table and columns + t = self.model.Tables[table_name] + t.IsHidden = True + for c in t.Columns: + c.IsHidden = True + + def remove_alternate_of(self, table_name: str, column_name: str): + """ + Removes the 'alternate of' property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + + Returns + ------- + + """ + + self.model.Tables[table_name].Columns[column_name].AlternateOf = None + + def get_annotations( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.Annotation": + """ + Shows all annotations for a given object within a semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.Annotation + TOM objects of all the annotations on a particular object within the semantic model. + """ + + # df = pd.DataFrame(columns=['Name', 'Value']) + + for a in object.Annotations: + # new_data = {'Name': a.Name, 'Value': a.Value} + yield a + # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + def set_annotation(self, object, name: str, value: str): + """ + Sets an annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + value : str + Value of the annotation. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + ann = TOM.Annotation() + ann.Name = name + ann.Value = value + + try: + object.Annotations[name].Value = value + except: + object.Annotations.Add(ann) + + def get_annotation_value(self, object, name: str): + """ + Obtains the annotation value for a given annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + + Returns + ------- + str + The annotation value. + """ + + return object.Annotations[name].Value + + def remove_annotation(self, object, name: str): + """ + Removes an annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + """ + + object.Annotations.Remove(name) + + def clear_annotations(self, object): + """ + Removes all annotations on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + + object.Annotations.Clear() + + def get_extended_properties( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.ExtendedProperty": + """ + Retrieves all extended properties on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.ExtendedPropertiesCollection + TOM Objects of all the extended properties. + """ + + for a in object.ExtendedProperties: + yield a + + def set_extended_property( + self, object, extended_property_type: str, name: str, value: str + ): + """ + Sets an extended property on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + extended_property_type : str + The extended property type. + `Valid values `_ + name : str + Name of the extended property. + value : str + Value of the extended property. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + extended_property_type = extended_property_type.title() + + if extended_property_type == "Json": + ep = TOM.JsonExtendedProperty() + else: + ep = TOM.StringExtendedProperty() + + ep.Name = name + ep.Value = value + + try: + object.ExtendedProperties[name].Value = value + except: + object.ExtendedProperties.Add(ep) + + def get_extended_property_value(self, object, name: str): + """ + Retrieves the value of an extended property for an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + + Returns + ------- + str + The extended property value. + """ + + return object.ExtendedProperties[name].Value + + def remove_extended_property(self, object, name: str): + """ + Removes an extended property on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + """ + + object.ExtendedProperties.Remove(name) + + def clear_extended_properties(self, object): + """ + Removes all extended properties on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + + object.ExtendedProperties.Clear() + + def in_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Indicates whether an object is contained within a given perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspecitve_name : str + Name of the perspective. + + Returns + ------- + bool + An indication as to whether the object is contained within the given perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) + return - obj = TOM.Hierarchy() - obj.Name = hierarchy_name - obj.IsHidden = hierarchy_hidden - if hierarchy_description is not None: - obj.Description = hierarchy_description - self.model.Tables[table_name].Hierarchies.Add(obj) - - for col in columns: - lvl = TOM.Level() - lvl.Column = self.model.Tables[table_name].Columns[col] - lvl.Name = levels[columns.index(col)] - lvl.Ordinal = columns.index(col) - self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add( - lvl - ) + object.Model.Perspectives[perspective_name] - def add_relationship( - self, - from_table: str, - from_column: str, - to_table: str, - to_column: str, - from_cardinality: str, - to_cardinality: str, - cross_filtering_behavior: Optional[str] = None, - is_active: Optional[bool] = True, - security_filtering_behavior: Optional[str] = None, - rely_on_referential_integrity: Optional[bool] = False, - ): - """ - Adds a relationship to a semantic model. - - Parameters - ---------- - from_table : str - Name of the table on the 'from' side of the relationship. - from_column : str - Name of the column on the 'from' side of the relationship. - to_table : str - Name of the table on the 'to' side of the relationship. - to_column : str - Name of the column on the 'to' side of the relationship. - from_cardinality : str - The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. - to_cardinality : str - The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. - cross_filtering_behavior : str, default=None - Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). - Defaults to None which resolves to 'Automatic'. - is_active : bool, default=True - Setting for whether the relationship is active or not. - security_filtering_behavior : str, default=None - Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). - Defaults to None which resolves to 'OneDirection'. - rely_on_referential_integrity : bool, default=False - Setting for the rely on referential integrity of the relationship. - - Returns - ------- - - """ - - if cross_filtering_behavior is None: - cross_filtering_behavior = "Automatic" - if security_filtering_behavior is None: - security_filtering_behavior = "OneDirection" - - from_cardinality = from_cardinality.capitalize() - to_cardinality = to_cardinality.capitalize() - cross_filtering_behavior = cross_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.replace( - "direct", "Direct" + try: + if objectType == TOM.ObjectType.Table: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] + elif objectType == TOM.ObjectType.Column: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns[object.Name] + elif objectType == TOM.ObjectType.Measure: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures[object.Name] + elif objectType == TOM.ObjectType.Hierarchy: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies[object.Name] + return True + except: + return False + + def add_to_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Adds an object to a perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." ) - cross_filtering_behavior = cross_filtering_behavior.replace( - "direct", "Direct" + return + try: + object.Model.Perspectives[perspective_name] + except: + print(f"The '{perspective_name}' perspective does not exist.") + return + + # try: + if objectType == TOM.ObjectType.Table: + pt = TOM.PerspectiveTable() + pt.Table = object + object.Model.Perspectives[perspective_name].PerspectiveTables.Add(pt) + elif objectType == TOM.ObjectType.Column: + pc = TOM.PerspectiveColumn() + pc.Column = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Add(pc) + elif objectType == TOM.ObjectType.Measure: + pm = TOM.PerspectiveMeasure() + pm.Measure = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Add(pm) + elif objectType == TOM.ObjectType.Hierarchy: + ph = TOM.PerspectiveHierarchy() + ph.Hierarchy = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Add(ph) + # except: + # pass + + def remove_from_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Removes an object from a perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." ) - - rel = TOM.SingleColumnRelationship() - rel.FromColumn = self.model.Tables[from_table].Columns[from_column] - rel.FromCardinality = System.Enum.Parse( - TOM.RelationshipEndCardinality, from_cardinality + return + try: + object.Model.Perspectives[perspective_name] + except: + print(f"The '{perspective_name}' perspective does not exist.") + return + + # try: + if objectType == TOM.ObjectType.Table: + pt = object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] + object.Model.Perspectives[perspective_name].PerspectiveTables.Remove(pt) + elif objectType == TOM.ObjectType.Column: + pc = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveColumns[object.Name] ) - rel.ToColumn = self.model.Tables[to_table].Columns[to_column] - rel.ToCardinality = System.Enum.Parse( - TOM.RelationshipEndCardinality, to_cardinality + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Remove(pc) + elif objectType == TOM.ObjectType.Measure: + pm = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveMeasures[object.Name] ) - rel.IsActive = is_active - rel.CrossFilteringBehavior = System.Enum.Parse( - TOM.CrossFilteringBehavior, cross_filtering_behavior + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Remove(pm) + elif objectType == TOM.ObjectType.Hierarchy: + ph = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveHierarchies[object.Name] ) - rel.SecurityFilteringBehavior = System.Enum.Parse( - TOM.SecurityFilteringBehavior, security_filtering_behavior + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Remove(ph) + # except: + # pass + + def set_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + property: str, + value: str, + ): + """ + Sets a translation value for an object's property. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + language : str + The language code. + property : str + The property to set. Options: 'Name', 'Description', 'Display Folder'. + value : str + The transation value. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + self.add_translation(language=language) + + property = property.title() + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] # , 'Level' + + if object.ObjectType not in validObjects: + print(f"Translations can only be set to {validObjects}.") + return + + mapping = { + "Name": TOM.TranslatedProperty.Caption, + "Description": TOM.TranslatedProperty.Description, + "Display Folder": TOM.TranslatedProperty.DisplayFolder, + } + + prop = mapping.get(property) + + try: + object.Model.Cultures[language] + except: + print( + f"The '{language}' translation language does not exist in the semantic model." ) - rel.RelyOnReferentialIntegrity = rely_on_referential_integrity - - self.model.Relationships.Add(rel) - - def add_calculation_group( - self, - name: str, - precedence: int, - description: Optional[str] = None, - hidden: Optional[bool] = False, - ): - """ - Adds a calculation group to a semantic model. - - Parameters - ---------- - name : str - Name of the calculation group. - precedence : int - The precedence of the calculation group. - description : str, default=None - A description of the calculation group. - hidden : bool, default=False - Whether the calculation group is hidden/visible. - - Returns - ------- - - """ - - tbl = TOM.Table() - tbl.Name = name - tbl.CalculationGroup = TOM.CalculationGroup() - tbl.CalculationGroup.Precedence = precedence - tbl.IsHidden = hidden - if description is not None: - tbl.Description = description - - part = TOM.Partition() - part.Name = name - part.Source = TOM.CalculationGroupSource() - tbl.Partitions.Add(part) - - sortCol = "Ordinal" - - col1 = TOM.DataColumn() - col1.Name = sortCol - col1.SourceColumn = sortCol - col1.IsHidden = True - col1.DataType = System.Enum.Parse(TOM.DataType, "Int64") - - tbl.Columns.Add(col1) - - col2 = TOM.DataColumn() - col2.Name = "Name" - col2.SourceColumn = "Name" - col2.DataType = System.Enum.Parse(TOM.DataType, "String") - # col.SortByColumn = m.Tables[name].Columns[sortCol] - tbl.Columns.Add(col2) - - self.model.DiscourageImplicitMeasures = True - self.model.Tables.Add(tbl) - - def add_expression( - self, name: str, expression: str, description: Optional[str] = None - ): - """ - Adds an expression to a semantic model. - - Parameters - ---------- - name : str - Name of the expression. - expression: str - The M expression of the expression. - description : str, default=None - A description of the expression. - - Returns - ------- - - """ - - exp = TOM.NamedExpression() - exp.Name = name - if description is not None: - exp.Description = description - exp.Kind = TOM.ExpressionKind.M - exp.Expression = expression - - self.model.Expressions.Add(exp) - - def add_translation(self, language: str): - """ - Adds a translation language (culture) to a semantic model. - - Parameters - ---------- - language : str - The language code (i.e. 'it-IT' for Italian). + return + + object.Model.Cultures[language].ObjectTranslations.SetTranslation( + object, prop, value + ) + + def remove_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + ): + """ + Removes an object's translation value. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + language : str + The language code. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + o = object.Model.Cultures[language].ObjectTranslations[ + object, TOM.TranslatedProperty.Caption + ] + object.Model.Cultures[language].ObjectTranslations.Remove(o) + + def remove_object(self, object): + """ + Removes an object from a semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + # Have to remove translations and perspectives on the object before removing it. + if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]: + for lang in object.Model.Cultures: + try: + self.remove_translation(object=object, language=lang.Name) + except: + pass + if objType in ["Table", "Column", "Measure", "Hierarchy"]: + for persp in object.Model.Perspectives: + try: + self.remove_from_perspective( + object=object, perspective_name=persp.Name + ) + except: + pass + + if objType == TOM.ObjectType.Column: + object.Parent.Columns.Remove(object.Name) + elif objType == TOM.ObjectType.Measure: + object.Parent.Measures.Remove(object.Name) + elif objType == TOM.ObjectType.Hierarchy: + object.Parent.Hierarchies.Remove(object.Name) + elif objType == TOM.ObjectType.Level: + object.Parent.Levels.Remove(object.Name) + elif objType == TOM.ObjectType.Partition: + object.Parent.Partitions.Remove(object.Name) + elif objType == TOM.ObjectType.Expression: + object.Parent.Expressions.Remove(object.Name) + elif objType == TOM.ObjectType.DataSource: + object.Parent.DataSources.Remove(object.Name) + elif objType == TOM.ObjectType.Role: + object.Parent.Roles.Remove(object.Name) + elif objType == TOM.ObjectType.Relationship: + object.Parent.Relationships.Remove(object.Name) + elif objType == TOM.ObjectType.Culture: + object.Parent.Cultures.Remove(object.Name) + elif objType == TOM.ObjectType.Perspective: + object.Parent.Perspectives.Remove(object.Name) + elif objType == TOM.ObjectType.CalculationItem: + object.Parent.CalculationItems.Remove(object.Name) + elif objType == TOM.ObjectType.TablePermission: + object.Parent.TablePermissions.Remove(object.Name) + + def used_in_relationships(self, object: Union["TOM.Table", "TOM.Column"]): + """ + Shows all relationships in which a table/column is used. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.RelationshipCollection + All relationships in which the table/column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Table: + for r in self.model.Relationships: + if r.FromTable.Name == object.Name or r.ToTable.Name == object.Name: + yield r # , 'Table' + elif objType == TOM.ObjectType.Column: + for r in self.model.Relationships: + if ( + r.FromTable.Name == object.Parent.Name + and r.FromColumn.Name == object.Name + ) or ( + r.ToTable.Name == object.Parent.Name + and r.ToColumn.Name == object.Name + ): + yield r # , 'Column' + + def used_in_levels(self, column: "TOM.Column"): + """ + Shows all levels in which a column is used. + + Parameters + ---------- + object : TOM Object + An column object within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.LevelCollection + All levels in which the column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + for l in self.all_levels(): + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): + yield l + + def used_in_hierarchies(self, column: "TOM.Column"): + """ + Shows all hierarchies in which a column is used. + + Parameters + ---------- + object : TOM Object + An column object within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.HierarchyCollection + All hierarchies in which the column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + for l in self.all_levels(): + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): + yield l.Parent - Returns - ------- + def used_in_sort_by(self, column: "TOM.Column"): + """ + Shows all columns in which a column is used for sorting. - """ + Parameters + ---------- + object : TOM Object + An column object within a semantic model. - cul = TOM.Culture() - cul.Name = language + Returns + ------- + Microsoft.AnalysisServices.Tabular.ColumnCollection + All columns in which the column is used for sorting. + """ + import Microsoft.AnalysisServices.Tabular as TOM - try: - self.model.Cultures.Add(cul) - except: - pass + objType = column.ObjectType - def add_perspective(self, perspective_name: str): - """ - Adds a perspective to a semantic model. + if objType == TOM.ObjectType.Column: + for c in self.model.Tables[column.Parent.Name].Columns: + if c.SortByColumn == column: + yield c - Parameters - ---------- - perspective_name : str - Name of the perspective. + def used_in_rls( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the filter expressions which reference a given object. - Returns - ------- + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - """ + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - persp = TOM.Perspective() - persp.Name = perspective_name - self.model.Perspectives.Add(persp) + """ + import Microsoft.AnalysisServices.Tabular as TOM - def add_m_partition( - self, - table_name: str, - partition_name: str, - expression: str, - mode: Optional[str] = None, - description: Optional[str] = None, - ): - """ - Adds an M-partition to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - partition_name : str - Name of the partition. - expression : str - The M expression encapsulating the logic for the partition. - mode : str, default=None - The query mode for the partition. - Defaults to None which resolves to 'Import'. - description : str, default=None - A description for the partition. - - Returns - ------- - - """ - - mode = ( - mode.title() - .replace("query", "Query") - .replace(" ", "") - .replace("lake", "Lake") - ) + objType = object.ObjectType - mp = TOM.MPartitionSource() - mp.Expression = expression - p = TOM.Partition() - p.Name = partition_name - p.Source = mp - if description is not None: - p.Description = description - if mode is None: - mode = "Default" - p.Mode = System.Enum.Parse(TOM.ModeType, mode) - - self.model.Tables[table_name].Partitions.Add(p) - - def add_entity_partition( - self, - table_name: str, - entity_name: str, - expression: Optional[str] = None, - description: Optional[str] = None, - ): - """ - Adds an entity partition to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - entity_name : str - Name of the lakehouse table. - expression : TOM Object, default=None - The expression used by the table. - Defaults to None which resolves to the 'DatabaseQuery' expression. - description : str, default=None - A description for the partition. - - Returns - ------- - - """ - - ep = TOM.EntityPartitionSource() - ep.Name = table_name - ep.EntityName = entity_name - if expression is None: - ep.ExpressionSource = self.model.Expressions["DatabaseQuery"] - else: - ep.ExpressionSource = expression - p = TOM.Partition() - p.Name = table_name - p.Source = ep - p.Mode = TOM.ModeType.DirectLake - if description is not None: - p.Description = description - - self.model.Tables[table_name].Partitions.Add(p) - - def set_alternate_of( - self, - table_name: str, - column_name: str, - summarization_type: str, - base_table: str, - base_column: Optional[str] = None, - ): - """ - Sets the 'alternate of' property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - summarization_type : str - The summarization type for the column. - `Valid values `_ - base_table : str - Name of the base table for aggregation. - base_column : str - Name of the base column for aggregation - - Returns - ------- - - """ - - if base_column is not None and base_table is None: - print( - f"ERROR: If you specify the base table you must also specify the base column" - ) + df_filt = dependencies[dependencies["Object Type"] == "Rows Allowed"] - summarization_type = ( - summarization_type.replace(" ", "") - .capitalize() - .replace("Groupby", "GroupBy") - ) + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m - summarizationTypes = ["Sum", "GroupBy", "Count", "Min", "Max"] - if summarization_type not in summarizationTypes: - print( - f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}." - ) - return + def used_in_data_coverage_definition( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the ... which reference a given object. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + df_filt = dependencies[ + dependencies["Object Type"] == "Data Coverage Definition" + ] + + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m - ao = TOM.AlternateOf() - ao.Summarization = System.Enum.Parse( - TOM.SummarizationType, summarization_type - ) - if base_column is not None: - ao.BaseColumn = self.model.Tables[base_table].Columns[base_column] - else: - ao.BaseTable = self.model.Tables[base_table] + def used_in_calc_item( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the ... which reference a given object. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + df_filt = dependencies[dependencies["Object Type"] == "Calculation Item"] + + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m - self.model.Tables[table_name].Columns[column_name].AlternateOf = ao + def hybrid_tables(self): + """ + Outputs the hybrid tables within a semantic model. - # Hide agg table and columns - t = self.model.Tables[table_name] - t.IsHidden = True - for c in t.Columns: - c.IsHidden = True + Parameters + ---------- - def remove_alternate_of(self, table_name: str, column_name: str): - """ - Removes the 'alternate of' property on a column. + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection + All hybrid tables within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. + for t in self.model.Tables: + if any(p.Mode == TOM.ModeType.Import for p in t.Partitions): + if any(p.Mode == TOM.ModeType.DirectQuery for p in t.Partitions): + yield t - Returns - ------- + def date_tables(self): + """ + Outputs the tables which are marked as date tables within a semantic model. - """ + Parameters + ---------- - self.model.Tables[table_name].Columns[column_name].AlternateOf = None + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection + All tables marked as date tables within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM - def get_annotations( - self, object - ) -> "Microsoft.AnalysisServices.Tabular.Annotation": - """ - Shows all annotations for a given object within a semantic model. + for t in self.model.Tables: + if t.DataCategory == "Time": + if any( + c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns + ): + yield t - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. + def is_hybrid_table(self, table_name: str): + """ + Identifies if a table is a hybrid table. - Returns - ------- - Microsoft.AnalysisServices.Tabular.Annotation - TOM objects of all the annotations on a particular object within the semantic model. - """ + Parameters + ---------- + table_name : str + Name of the table. - # df = pd.DataFrame(columns=['Name', 'Value']) + Returns + ------- + bool + Indicates if the table is a hybrid table. + """ + import Microsoft.AnalysisServices.Tabular as TOM - for a in object.Annotations: - # new_data = {'Name': a.Name, 'Value': a.Value} - yield a - # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + isHybridTable = False - def set_annotation(self, object, name: str, value: str): - """ - Sets an annotation on an object within the semantic model. + if any( + p.Mode == TOM.ModeType.Import + for p in self.model.Tables[table_name].Partitions + ): + if any( + p.Mode == TOM.ModeType.DirectQuery + for p in self.model.Tables[table_name].Partitions + ): + isHybridTable = True + + return isHybridTable + + def is_date_table(self, table_name: str): + """ + Identifies if a table is marked as a date table. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table is marked as a date table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + isDateTable = False + t = self.model.Tables[table_name] + + if t.DataCategory == "Time": + if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): + isDateTable = True + + return isDateTable + + def mark_as_date_table(self, table_name: str, column_name: str): + """ + Marks a table as a date table. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the date column in the table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + t = self.model.Tables[table_name] + c = t.Columns[column_name] + if c.DataType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type." + ) + return + + daxQuery = f""" + define measure '{table_name}'[test] = + var mn = MIN('{table_name}'[{column_name}]) + var ma = MAX('{table_name}'[{column_name}]) + var x = COUNTROWS(DISTINCT('{table_name}'[{column_name}])) + var y = DATEDIFF(mn, ma, DAY) + 1 + return if(y = x, 1,0) + + EVALUATE + SUMMARIZECOLUMNS( + "1",[test] + ) + """ + df = fabric.evaluate_dax( + dataset=self.dataset, workspace=self.workspace, dax_string=daxQuery + ) + value = df["1"].iloc[0] + if value != "1": + print( + f"{icons.red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values." + ) + return - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - value : str - Value of the annotation. + # Mark as a date table + t.DataCategory = "Time" + c.Columns[column_name].IsKey = True + print( + f"{icons.green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key." + ) - Returns - ------- + def has_aggs(self): + """ + Identifies if a semantic model has any aggregations. - """ + Parameters + ---------- - ann = TOM.Annotation() - ann.Name = name - ann.Value = value + Returns + ------- + bool + Indicates if the semantic model has any aggregations. + """ - try: - object.Annotations[name].Value = value - except: - object.Annotations.Add(ann) + hasAggs = False - def get_annotation_value(self, object, name: str): - """ - Obtains the annotation value for a given annotation on an object within the semantic model. + for c in self.all_columns(): + if c.AlterateOf is not None: + hasAggs = True - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. + return hasAggs - Returns - ------- - str - The annotation value. - """ + def is_agg_table(self, table_name: str): + """ + Identifies if a table has aggregations. - return object.Annotations[name].Value + Parameters + ---------- + table_name : str + Name of the table. - def remove_annotation(self, object, name: str): - """ - Removes an annotation on an object within the semantic model. + Returns + ------- + bool + Indicates if the table has any aggregations. + """ - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. + t = self.model.Tables[table_name] - Returns - ------- + return any(c.AlternateOf is not None for c in t.Columns) - """ + def has_hybrid_table(self): + """ + Identifies if a semantic model has a hybrid table. - object.Annotations.Remove(name) + Parameters + ---------- - def clear_annotations(self, object): - """ - Removes all annotations on an object within the semantic model. + Returns + ------- + bool + Indicates if the semantic model has a hybrid table. + """ - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. + hasHybridTable = False - Returns - ------- + for t in self.model.Tables: + if self.is_hybrid_table(table_name=t.Name): + hasHybridTable = True - """ + return hasHybridTable - object.Annotations.Clear() + def has_date_table(self): + """ + Identifies if a semantic model has a table marked as a date table. - def get_extended_properties( - self, object - ) -> "Microsoft.AnalysisServices.Tabular.ExtendedProperty": - """ - Retrieves all extended properties on an object within the semantic model. + Parameters + ---------- - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. + Returns + ------- + bool + Indicates if the semantic model has a table marked as a date table. + """ - Returns - ------- - Microsoft.AnalysisServices.Tabular.ExtendedPropertiesCollection - TOM Objects of all the extended properties. - """ + hasDateTable = False - # df = pd.DataFrame(columns=['Name', 'Value', 'Type']) + for t in self.model.Tables: + if self.is_date_table(table_name=t.Name): + hasDateTable = True - for a in object.ExtendedProperties: - yield a - # new_data = {'Name': a.Name, 'Value': a.Value, 'Type': a.Type} - # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + return hasDateTable - # return df + def is_direct_lake(self): + """ + Identifies if a semantic model is in Direct Lake mode. - def set_extended_property( - self, object, extended_property_type: str, name: str, value: str - ): - """ - Sets an extended property on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - extended_property_type : str - The extended property type. - `Valid values `_ - name : str - Name of the extended property. - value : str - Value of the extended property. - - Returns - ------- - - """ - - extended_property_type = extended_property_type.title() - - if extended_property_type == "Json": - ep = TOM.JsonExtendedProperty() - else: - ep = TOM.StringExtendedProperty() + Parameters + ---------- - ep.Name = name - ep.Value = value + Returns + ------- + bool + Indicates if the semantic model is in Direct Lake mode. + """ - try: - object.ExtendedProperties[name].Value = value - except: - object.ExtendedProperties.Add(ep) + return any( + p.Mode == TOM.ModeType.DirectLake + for t in self.model.Tables + for p in t.Partitions + ) - def get_extended_property_value(self, object, name: str): - """ - Retrieves the value of an extended property for an object within the semantic model. + def is_field_parameter(self, table_name: str): + """ + Identifies if a table is a field parameter. - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. + Parameters + ---------- + table_name : str + Name of the table. - Returns - ------- - str - The extended property value. - """ + Returns + ------- + bool + Indicates if the table is a field parameter. + """ + import Microsoft.AnalysisServices.Tabular as TOM - return object.ExtendedProperties[name].Value + t = self.model.Tables[table_name] - def remove_extended_property(self, object, name: str): - """ - Removes an extended property on an object within the semantic model. + return ( + any( + p.SourceType == TOM.PartitionSourceType.Calculated + and "NAMEOF(" in p.Source.Expression + for p in t.Partitions + ) + and all( + "[Value" in c.SourceColumn + for c in t.Columns + if c.Type != TOM.ColumnType.RowNumber + ) + and t.Columns.Count == 4 + ) - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. + def is_auto_date_table(self, table_name: str): + """ + Identifies if a table is an auto-date table. - Returns - ------- + Parameters + ---------- + table_name : str + Name of the table. - """ + Returns + ------- + bool + Indicates if the table is an auto-date table. + """ + import Microsoft.AnalysisServices.Tabular as TOM - object.ExtendedProperties.Remove(name) + isAutoDate = False - def clear_extended_properties(self, object): - """ - Removes all extended properties on an object within the semantic model. + t = self.model.Tables[table_name] - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. + if t.Name.startswith("LocalDateTable_") or t.Name.startswith( + "DateTableTemplate_" + ): + if any( + p.SourceType == TOM.PartitionSourceType.Calculated for p in t.Partitions + ): + isAutoDate = True + + return isAutoDate + + def set_kpi( + self, + measure_name: str, + target: Union[int, float, str], + lower_bound: float, + upper_bound: float, + lower_mid_bound: Optional[float] = None, + upper_mid_bound: Optional[float] = None, + status_type: Optional[str] = None, + status_graphic: Optional[str] = None, + ): + """ + Sets the properties to add/update a KPI for a measure. + + Parameters + ---------- + measure_name : str + Name of the measure. + target : str, int, float + The target for the KPI. This can either be a number or the name of a different measure in the semantic model. + lower_bound: float + The lower bound for the KPI. + upper_bound : float + The upper bound for the KPI. + lower_mid_bound : float, default=None + The lower-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. + upper_mid_bound : float, default=None + The upper-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. + status_type : str, default=None + The status type of the KPI. Options: 'Linear', 'LinearReversed', 'Centered', 'CenteredReversed'. + Defaults to None which resolvs to 'Linear'. + status_graphic : str, default=None + The status graphic for the KPI. + Defaults to 'Three Circles Colored'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + # https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md + + if measure_name == target: + print( + f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter." + ) + return - Returns - ------- + if status_graphic is None: + status_graphic = "Three Circles Colored" - """ + statusType = ["Linear", "LinearReversed", "Centered", "CenteredReversed"] + status_type = status_type.title().replace(" ", "") - object.ExtendedProperties.Clear() + if status_type is None: + status_type = "Linear" - def in_perspective( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], - perspective_name: str, - ): - """ - Indicates whether an object is contained within a given perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspecitve_name : str - Name of the perspective. - - Returns - ------- - bool - An indication as to whether the object is contained within the given perspective. - """ - - validObjects = [ - TOM.ObjectType.Table, - TOM.ObjectType.Column, - TOM.ObjectType.Measure, - TOM.ObjectType.Hierarchy, - ] - objectType = object.ObjectType + if status_type not in statusType: + print( + f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}." + ) + return - if objectType not in validObjects: + if status_type in ["Linear", "LinearReversed"]: + if upper_bound is not None or lower_mid_bound is not None: print( - f"Only the following object types are valid for perspectives: {validObjects}." + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None." ) return + elif upper_bound <= lower_bound: + print(f"The upper_bound must be greater than the lower_bound.") + return - object.Model.Perspectives[perspective_name] - - try: - if objectType == TOM.ObjectType.Table: - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Name - ] - elif objectType == TOM.ObjectType.Column: - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveColumns[object.Name] - elif objectType == TOM.ObjectType.Measure: - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveMeasures[object.Name] - elif objectType == TOM.ObjectType.Hierarchy: - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveHierarchies[object.Name] - return True - except: - return False - - def add_to_perspective( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], - perspective_name: str, - ): - """ - Adds an object to a perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspective_name : str - Name of the perspective. - - Returns - ------- - - """ - - validObjects = [ - TOM.ObjectType.Table, - TOM.ObjectType.Column, - TOM.ObjectType.Measure, - TOM.ObjectType.Hierarchy, - ] - objectType = object.ObjectType - - if objectType not in validObjects: + if status_type in ["Centered", "CenteredReversed"]: + if upper_mid_bound is None or lower_mid_bound is None: print( - f"Only the following object types are valid for perspectives: {validObjects}." + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types." ) return - try: - object.Model.Perspectives[perspective_name] - except: - print(f"The '{perspective_name}' perspective does not exist.") - return - - # try: - if objectType == TOM.ObjectType.Table: - pt = TOM.PerspectiveTable() - pt.Table = object - object.Model.Perspectives[perspective_name].PerspectiveTables.Add(pt) - elif objectType == TOM.ObjectType.Column: - pc = TOM.PerspectiveColumn() - pc.Column = object - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveColumns.Add(pc) - elif objectType == TOM.ObjectType.Measure: - pm = TOM.PerspectiveMeasure() - pm.Measure = object - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveMeasures.Add(pm) - elif objectType == TOM.ObjectType.Hierarchy: - ph = TOM.PerspectiveHierarchy() - ph.Hierarchy = object - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveHierarchies.Add(ph) - # except: - # pass - - def remove_from_perspective( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], - perspective_name: str, - ): - """ - Removes an object from a perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspective_name : str - Name of the perspective. - - Returns - ------- - - """ - - validObjects = [ - TOM.ObjectType.Table, - TOM.ObjectType.Column, - TOM.ObjectType.Measure, - TOM.ObjectType.Hierarchy, - ] - objectType = object.ObjectType - - if objectType not in validObjects: - print( - f"Only the following object types are valid for perspectives: {validObjects}." - ) - return - try: - object.Model.Perspectives[perspective_name] - except: - print(f"The '{perspective_name}' perspective does not exist.") - return - - # try: - if objectType == TOM.ObjectType.Table: - pt = object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Name - ] - object.Model.Perspectives[perspective_name].PerspectiveTables.Remove(pt) - elif objectType == TOM.ObjectType.Column: - pc = ( - object.Model.Perspectives[perspective_name] - .PerspectiveTables[object.Parent.Name] - .PerspectiveColumns[object.Name] - ) - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveColumns.Remove(pc) - elif objectType == TOM.ObjectType.Measure: - pm = ( - object.Model.Perspectives[perspective_name] - .PerspectiveTables[object.Parent.Name] - .PerspectiveMeasures[object.Name] - ) - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveMeasures.Remove(pm) - elif objectType == TOM.ObjectType.Hierarchy: - ph = ( - object.Model.Perspectives[perspective_name] - .PerspectiveTables[object.Parent.Name] - .PerspectiveHierarchies[object.Name] - ) - object.Model.Perspectives[perspective_name].PerspectiveTables[ - object.Parent.Name - ].PerspectiveHierarchies.Remove(ph) - # except: - # pass - - def set_translation( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], - language: str, - property: str, - value: str, - ): - """ - Sets a translation value for an object's property. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - language : str - The language code. - property : str - The property to set. Options: 'Name', 'Description', 'Display Folder'. - value : str - The transation value. - - Returns - ------- - - """ - - self.add_translation(language=language) - - property = property.title() - - validObjects = [ - TOM.ObjectType.Table, - TOM.ObjectType.Column, - TOM.ObjectType.Measure, - TOM.ObjectType.Hierarchy, - ] # , 'Level' - - if object.ObjectType not in validObjects: - print(f"Translations can only be set to {validObjects}.") - return - - mapping = { - "Name": TOM.TranslatedProperty.Caption, - "Description": TOM.TranslatedProperty.Description, - "Display Folder": TOM.TranslatedProperty.DisplayFolder, - } - - prop = mapping.get(property) - - try: - object.Model.Cultures[language] - except: - print( - f"The '{language}' translation language does not exist in the semantic model." - ) - return - - object.Model.Cultures[language].ObjectTranslations.SetTranslation( - object, prop, value + elif upper_bound <= upper_mid_bound: + print(f"The upper_bound must be greater than the upper_mid_bound.") + elif upper_mid_bound <= lower_mid_bound: + print(f"The upper_mid_bound must be greater than the lower_mid_bound.") + elif lower_mid_bound <= lower_bound: + print(f"The lower_mid_bound must be greater than the lower_bound.") + + try: + table_name = next( + m.Parent.Name for m in self.all_measures() if m.Name == measure_name ) + except: + print( + f"The '{measure_name}' measure does not exist in the '{self.dataset}' semantic model within the '{self.workspace}'." + ) + return + + graphics = [ + "Cylinder", + "Five Bars Colored", + "Five Boxes Colored", + "Gauge - Ascending", + "Gauge - Descending", + "Road Signs", + "Shapes", + "Standard Arrow", + "Three Circles Colored", + "Three Flags Colored", + "Three Stars Colored", + "Three Symbols Uncircled Colored", + "Traffic Light", + "Traffic Light - Single", + "Variance Arrow", + "Status Arrow - Ascending", + "Status Arrow - Descending", + ] + + if status_graphic not in graphics: + print( + f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}." + ) + return - def remove_translation( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], - language: str, - ): - """ - Removes an object's translation value. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - language : str - The language code. - - Returns - ------- - - """ - - o = object.Model.Cultures[language].ObjectTranslations[ - object, TOM.TranslatedProperty.Caption - ] - object.Model.Cultures[language].ObjectTranslations.Remove(o) - - def remove_object(self, object): - """ - Removes an object from a semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - - """ - - objType = object.ObjectType - - # Have to remove translations and perspectives on the object before removing it. - if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]: - for lang in object.Model.Cultures: - try: - self.remove_translation(object=object, language=lang.Name) - except: - pass - if objType in ["Table", "Column", "Measure", "Hierarchy"]: - for persp in object.Model.Perspectives: - try: - self.remove_from_perspective( - object=object, perspective_name=persp.Name - ) - except: - pass - - if objType == TOM.ObjectType.Column: - object.Parent.Columns.Remove(object.Name) - elif objType == TOM.ObjectType.Measure: - object.Parent.Measures.Remove(object.Name) - elif objType == TOM.ObjectType.Hierarchy: - object.Parent.Hierarchies.Remove(object.Name) - elif objType == TOM.ObjectType.Level: - object.Parent.Levels.Remove(object.Name) - elif objType == TOM.ObjectType.Partition: - object.Parent.Partitions.Remove(object.Name) - elif objType == TOM.ObjectType.Expression: - object.Parent.Expressions.Remove(object.Name) - elif objType == TOM.ObjectType.DataSource: - object.Parent.DataSources.Remove(object.Name) - elif objType == TOM.ObjectType.Role: - object.Parent.Roles.Remove(object.Name) - elif objType == TOM.ObjectType.Relationship: - object.Parent.Relationships.Remove(object.Name) - elif objType == TOM.ObjectType.Culture: - object.Parent.Cultures.Remove(object.Name) - elif objType == TOM.ObjectType.Perspective: - object.Parent.Perspectives.Remove(object.Name) - elif objType == TOM.ObjectType.CalculationItem: - object.Parent.CalculationItems.Remove(object.Name) - elif objType == TOM.ObjectType.TablePermission: - object.Parent.TablePermissions.Remove(object.Name) - - def used_in_relationships(self, object: Union["TOM.Table", "TOM.Column"]): - """ - Shows all relationships in which a table/column is used. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.RelationshipCollection - All relationships in which the table/column is used. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Table: - for r in self.model.Relationships: - if r.FromTable.Name == object.Name or r.ToTable.Name == object.Name: - yield r # , 'Table' - elif objType == TOM.ObjectType.Column: - for r in self.model.Relationships: - if ( - r.FromTable.Name == object.Parent.Name - and r.FromColumn.Name == object.Name - ) or ( - r.ToTable.Name == object.Parent.Name - and r.ToColumn.Name == object.Name - ): - yield r # , 'Column' - - def used_in_levels(self, column: "TOM.Column"): - """ - Shows all levels in which a column is used. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.LevelCollection - All levels in which the column is used. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for l in self.all_levels(): - if ( - l.Parent.Table.Name == column.Parent.Name - and l.Column.Name == column.Name - ): - yield l - - def used_in_hierarchies(self, column: "TOM.Column"): - """ - Shows all hierarchies in which a column is used. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.HierarchyCollection - All hierarchies in which the column is used. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for l in self.all_levels(): - if ( - l.Parent.Table.Name == column.Parent.Name - and l.Column.Name == column.Name - ): - yield l.Parent - - def used_in_sort_by(self, column: "TOM.Column"): - """ - Shows all columns in which a column is used for sorting. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.ColumnCollection - All columns in which the column is used for sorting. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for c in self.model.Tables[column.Parent.Name].Columns: - if c.SortByColumn == column: - yield c - - def used_in_rls( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], - dependencies: pd.DataFrame, - ): - """ - Identifies the filter expressions which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[dependencies["Object Type"] == "Rows Allowed"] - - if objType == TOM.ObjectType.Table: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Table") - & (df_filt["Referenced Table"] == object.Name) - ] - tbls = fil["Table Name"].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Column") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - cols = fil["Full Object Name"].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Measure") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - meas = fil["Object Name"].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def used_in_data_coverage_definition( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], - dependencies: pd.DataFrame, - ): - """ - Identifies the ... which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[ - dependencies["Object Type"] == "Data Coverage Definition" - ] - - if objType == TOM.ObjectType.Table: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Table") - & (df_filt["Referenced Table"] == object.Name) - ] - tbls = fil["Table Name"].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Column") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - cols = fil["Full Object Name"].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Measure") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - meas = fil["Object Name"].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def used_in_calc_item( - self, - object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], - dependencies: pd.DataFrame, - ): - """ - Identifies the ... which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[dependencies["Object Type"] == "Calculation Item"] - - if objType == TOM.ObjectType.Table: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Table") - & (df_filt["Referenced Table"] == object.Name) - ] - tbls = fil["Table Name"].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Column") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - cols = fil["Full Object Name"].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[ - (df_filt["Referenced Object Type"] == "Measure") - & (df_filt["Referenced Table"] == object.Parent.Name) - & (df_filt["Referenced Object"] == object.Name) - ] - meas = fil["Object Name"].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def hybrid_tables(self): - """ - Outputs the hybrid tables within a semantic model. - - Parameters - ---------- - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection - All hybrid tables within a semantic model. - """ - - for t in self.model.Tables: - if any(p.Mode == TOM.ModeType.Import for p in t.Partitions): - if any(p.Mode == TOM.ModeType.DirectQuery for p in t.Partitions): - yield t - - def date_tables(self): - """ - Outputs the tables which are marked as date tables within a semantic model. - - Parameters - ---------- - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection - All tables marked as date tables within a semantic model. - """ - - for t in self.model.Tables: - if t.DataCategory == "Time": - if any( - c.IsKey and c.DataType == TOM.DataType.DateTime - for c in t.Columns - ): - yield t - - def is_hybrid_table(self, table_name: str): - """ - Identifies if a table is a hybrid table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is a hybrid table. - """ - - isHybridTable = False - - if any( - p.Mode == TOM.ModeType.Import - for p in self.model.Tables[table_name].Partitions - ): - if any( - p.Mode == TOM.ModeType.DirectQuery - for p in self.model.Tables[table_name].Partitions - ): - isHybridTable = True - - return isHybridTable - - def is_date_table(self, table_name: str): - """ - Identifies if a table is marked as a date table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is marked as a date table. - """ - - isDateTable = False - t = self.model.Tables[table_name] - - if t.DataCategory == "Time": - if any( - c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns - ): - isDateTable = True - - return isDateTable - - def mark_as_date_table(self, table_name: str, column_name: str): - """ - Marks a table as a date table. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the date column in the table. - - Returns - ------- - - """ - - t = self.model.Tables[table_name] - c = t.Columns[column_name] - if c.DataType != TOM.DataType.DateTime: - print( - f"{icons.red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type." - ) - return - - daxQuery = f""" - define measure '{table_name}'[test] = - var mn = MIN('{table_name}'[{column_name}]) - var ma = MAX('{table_name}'[{column_name}]) - var x = COUNTROWS(DISTINCT('{table_name}'[{column_name}])) - var y = DATEDIFF(mn, ma, DAY) + 1 - return if(y = x, 1,0) - - EVALUATE - SUMMARIZECOLUMNS( - "1",[test] - ) - """ - df = fabric.evaluate_dax( - dataset=dataset, workspace=workspace, dax_string=daxQuery - ) - value = df["1"].iloc[0] - if value != "1": - print( - f"{icons.red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values." - ) - return - - # Mark as a date table - t.DataCategory = "Time" - c.Columns[column_name].IsKey = True - print( - f"{icons.green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key." - ) - - def has_aggs(self): - """ - Identifies if a semantic model has any aggregations. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has any aggregations. - """ - - hasAggs = False - - for c in self.all_columns(): - if c.AlterateOf is not None: - hasAggs = True - - return hasAggs - - def is_agg_table(self, table_name: str): - """ - Identifies if a table has aggregations. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table has any aggregations. - """ - - t = self.model.Tables[table_name] - - return any(c.AlternateOf is not None for c in t.Columns) - - def has_hybrid_table(self): - """ - Identifies if a semantic model has a hybrid table. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has a hybrid table. - """ - - hasHybridTable = False - - for t in self.model.Tables: - if self.is_hybrid_table(table_name=t.Name): - hasHybridTable = True - - return hasHybridTable - - def has_date_table(self): - """ - Identifies if a semantic model has a table marked as a date table. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has a table marked as a date table. - """ - - hasDateTable = False - - for t in self.model.Tables: - if self.is_date_table(table_name=t.Name): - hasDateTable = True - - return hasDateTable - - def is_direct_lake(self): - """ - Identifies if a semantic model is in Direct Lake mode. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model is in Direct Lake mode. - """ - - return any( - p.Mode == TOM.ModeType.DirectLake - for t in self.model.Tables - for p in t.Partitions - ) - - def is_field_parameter(self, table_name: str): - """ - Identifies if a table is a field parameter. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is a field parameter. - """ - - t = self.model.Tables[table_name] - - return ( - any( - p.SourceType == TOM.PartitionSourceType.Calculated - and "NAMEOF(" in p.Source.Expression - for p in t.Partitions - ) - and all( - "[Value" in c.SourceColumn - for c in t.Columns - if c.Type != TOM.ColumnType.RowNumber - ) - and t.Columns.Count == 4 - ) - - def is_auto_date_table(self, table_name: str): - """ - Identifies if a table is an auto-date table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is an auto-date table. - """ - - isAutoDate = False - - t = self.model.Tables[table_name] - - if t.Name.startswith("LocalDateTable_") or t.Name.startswith( - "DateTableTemplate_" - ): - if any( - p.SourceType == TOM.PartitionSourceType.Calculated - for p in t.Partitions - ): - isAutoDate = True - - return isAutoDate - - def set_kpi( - self, - measure_name: str, - target: Union[int, float, str], - lower_bound: float, - upper_bound: float, - lower_mid_bound: Optional[float] = None, - upper_mid_bound: Optional[float] = None, - status_type: Optional[str] = None, - status_graphic: Optional[str] = None, - ): - """ - Sets the properties to add/update a KPI for a measure. - - Parameters - ---------- - measure_name : str - Name of the measure. - target : str, int, float - The target for the KPI. This can either be a number or the name of a different measure in the semantic model. - lower_bound: float - The lower bound for the KPI. - upper_bound : float - The upper bound for the KPI. - lower_mid_bound : float, default=None - The lower-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. - upper_mid_bound : float, default=None - The upper-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. - status_type : str, default=None - The status type of the KPI. Options: 'Linear', 'LinearReversed', 'Centered', 'CenteredReversed'. - Defaults to None which resolvs to 'Linear'. - status_graphic : str, default=None - The status graphic for the KPI. - Defaults to 'Three Circles Colored'. - - Returns - ------- - - """ - - # https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md - - if measure_name == target: - print( - f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter." - ) - return - - if status_graphic is None: - status_graphic = "Three Circles Colored" - - statusType = ["Linear", "LinearReversed", "Centered", "CenteredReversed"] - status_type = status_type.title().replace(" ", "") - - if status_type is None: - status_type = "Linear" - - if status_type not in statusType: - print( - f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}." - ) - return - - if status_type in ["Linear", "LinearReversed"]: - if upper_bound is not None or lower_mid_bound is not None: - print( - f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None." - ) - return - elif upper_bound <= lower_bound: - print(f"The upper_bound must be greater than the lower_bound.") - return - - if status_type in ["Centered", "CenteredReversed"]: - if upper_mid_bound is None or lower_mid_bound is None: - print( - f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types." - ) - return - elif upper_bound <= upper_mid_bound: - print(f"The upper_bound must be greater than the upper_mid_bound.") - elif upper_mid_bound <= lower_mid_bound: - print( - f"The upper_mid_bound must be greater than the lower_mid_bound." - ) - elif lower_mid_bound <= lower_bound: - print(f"The lower_mid_bound must be greater than the lower_bound.") - - try: - table_name = next( - m.Parent.Name for m in self.all_measures() if m.Name == measure_name - ) - except: - print( - f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'." - ) - return - - graphics = [ - "Cylinder", - "Five Bars Colored", - "Five Boxes Colored", - "Gauge - Ascending", - "Gauge - Descending", - "Road Signs", - "Shapes", - "Standard Arrow", - "Three Circles Colored", - "Three Flags Colored", - "Three Stars Colored", - "Three Symbols Uncircled Colored", - "Traffic Light", - "Traffic Light - Single", - "Variance Arrow", - "Status Arrow - Ascending", - "Status Arrow - Descending", - ] - - if status_graphic not in graphics: - print( - f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}." - ) - return - - measure_target = True - - try: - float(target) - tgt = str(target) - measure_target = False - except: - try: - tgt = next( - format_dax_object_name(m.Parent.Name, m.Name) - for m in self.all_measures() - if m.Name == target - ) - except: - print( - f"The '{target}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'." - ) - - if measure_target: - expr = f"var x = [{measure_name}]/[{target}]\nreturn" - else: - expr = f"var x = [{measure_name}\nreturn" - - if status_type == "Linear": - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_bound},-1,\n\t\tif(x<{upper_bound},0,1)))" - elif status_type == "LinearReversed": - expr = f"{expr}\nif(isblank(x),blank(),\nif(x<{lower_bound},1,\n\t\tif(x<{upper_bound},0,-1)))" - elif status_type == "Centered": - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},-1,0),\n\t\t\tif(x<{upper_mid_bound},1,\n\t\t\t\tif(x<{upper_bound}0,-1))))" - elif status_type == "CenteredReversed": - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},1,0),\n\t\t\tif(x<{upper_mid_bound},-1,\n\t\t\t\tif(x<{upper_bound}0,1))))" - - kpi = TOM.KPI() - kpi.TargetExpression = tgt - kpi.StatusGraphic = status_graphic - kpi.StatusExpression = expr - - ms = self.model.Tables[table_name].Measures[measure_name] - try: - ms.KPI.TargetExpression = tgt - ms.KPI.StatusGraphic = status_graphic - ms.KPI.StatusExpression = expr - except: - ms.KPI = kpi - - def set_aggregations(self, table_name: str, agg_table_name: str): - """ - Sets the aggregations (alternate of) for all the columns in an aggregation table based on a base table. - - Parameters - ---------- - table_name : str - Name of the base table. - agg_table_name : str - Name of the aggregation table. - - Returns - ------- - - """ - - for c in self.model.Tables[agg_table_name].Columns: - - dataType = c.DataType - - if dataType in [ - TOM.DataType.String, - TOM.DataType.Boolean, - TOM.DataType.DateTime, - ]: - sumType = "GroupBy" - else: - sumType = "Sum" - - self.set_alternate_of( - table_name=agg_table_name, - column_name=c.Name, - base_table=table_name, - base_column=c.Name, - summarization_type=sumType, - ) - - def set_is_available_in_mdx( - self, table_name: str, column_name: str, value: Optional[bool] = False - ): - """ - Sets the IsAvailableInMdx property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : bool, default=False - The IsAvailableInMdx property value. - - Returns - ------- - - """ - - self.model.Tables[table_name].Columns[column_name].IsAvailableInMdx = value - - def set_summarize_by( - self, table_name: str, column_name: str, value: Optional[str] = None - ): - """ - Sets the SummarizeBy property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : bool, default=None - The SummarizeBy property value. - Defaults to none which resolves to 'Default'. - `Valid values `_ - - Returns - ------- - - """ - - values = [ - "Default", - "None", - "Sum", - "Min", - "Max", - "Count", - "Average", - "DistinctCount", - ] - # https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby - - if value is None: - value = "Default" - value = ( - value.capitalize() - .replace("Distinctcount", "DistinctCount") - .replace("Avg", "Average") - ) - - if value not in values: - print( - f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}." - ) - return - - self.model.Tables[table_name].Columns[column_name].SummarizeBy = ( - System.Enum.Parse(TOM.AggregateFunction, value) - ) - - def set_direct_lake_behavior(self, direct_lake_behavior: str): - """ - Sets the Direct Lake Behavior property for a semantic model. - - Parameters - ---------- - direct_lake_behavior : str - The DirectLakeBehavior property value. - `Valid values `_ - - Returns - ------- - - """ - - direct_lake_behavior = direct_lake_behavior.capitalize() - if direct_lake_behavior.startswith("Auto"): - direct_lake_behavior = "Automatic" - elif ( - direct_lake_behavior.startswith("Directl") - or direct_lake_behavior == "Dl" - ): - direct_lake_behavior = "DirectLakeOnly" - elif ( - direct_lake_behavior.startswith("Directq") - or direct_lake_behavior == "Dq" - ): - direct_lake_behavior = "DirectQueryOnly" - - dlValues = ["Automatic", "DirectLakeOnly", "DirectQueryOnly"] - - if direct_lake_behavior not in dlValues: - print( - f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}." - ) - return - - self.model.DirectLakeBehavior = System.Enum.Parse( - TOM.DirectLakeBehavior, direct_lake_behavior - ) - - def add_table( - self, - name: str, - description: Optional[str] = None, - data_category: Optional[str] = None, - hidden: Optional[bool] = False, - ): - """ - Adds a table to the semantic model. - - Parameters - ---------- - name : str - Name of the table. - description : str, default=None - A description of the table. - data_catgegory : str, default=None - The data category for the table. - hidden : bool, default=False - Whether the table is hidden or visible. - - Returns - ------- - - """ - - t = TOM.Table() - t.Name = name - if description is not None: - t.Description = description - if data_category is not None: - t.DataCategory = data_category - t.Hidden = hidden - self.model.Tables.Add(t) - - def add_calculated_table( - self, - name: str, - expression: str, - description: Optional[str] = None, - data_category: Optional[str] = None, - hidden: Optional[bool] = False, - ): - """ - Adds a calculated table to the semantic model. - - Parameters - ---------- - name : str - Name of the table. - expression : str - The DAX expression for the calculated table. - description : str, default=None - A description of the table. - data_catgegory : str, default=None - The data category for the table. - hidden : bool, default=False - Whether the table is hidden or visible. - - Returns - ------- - - """ - - par = TOM.Partition() - par.Name = name - - parSource = TOM.CalculatedPartitionSource() - parSource.Expression = expression - par.Source = parSource - - t = TOM.Table() - t.Name = name - if description is not None: - t.Description = description - if data_category is not None: - t.DataCategory = data_category - t.Hidden = hidden - t.Partitions.Add(par) - self.model.Tables.Add(t) - - def add_field_parameter(self, table_name: str, objects: List[str]): - """ - Adds a table to the semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - objects : List[str] - The columns/measures to be included in the field parameter. - Columns must be specified as such : 'Table Name'[Column Name]. - Measures may be formatted as '[Measure Name]' or 'Measure Name'. - - Returns - ------- - - """ - - if isinstance(objects, str): - print(f"The 'objects' parameter must be a list of columns/measures.") - return - if len(objects) == 1: - print( - f"There must be more than one object (column/measure) within the objects parameter." - ) - return - - expr = "" - i = 0 - for obj in objects: - success = False - for m in self.all_measures(): - if obj == "[" + m.Name + "]" or obj == m.Name: - expr = ( - expr - + "\n\t" - + '("' - + m.Name - + '", NAMEOF([' - + m.Name - + "]), " - + str(i) - + ")," - ) - success = True - for c in self.all_columns(): - fullObjName = format_dax_object_name(c.Parent.Name, c.Name) - if obj == fullObjName or obj == c.Parent.Name + "[" + c.Name + "]": - expr = ( - expr - + "\n\t" - + '("' - + c.Name - + '", NAMEOF(' - + fullObjName - + "), " - + str(i) - + ")," - ) - success = True - if not success: - print( - f"The '{obj}' object was not found in the '{dataset}' semantic model." - ) - return - else: - i += 1 - - expr = "{" + expr.rstrip(",") + "\n}" - - self.add_calculated_table(name=table_name, expression=expr) - - col2 = table_name + " Fields" - col3 = table_name + " Order" - - self.add_calculated_table_column( - table_name=table_name, - column_name=table_name, - source_column="[Value1]", - data_type="String", - hidden=False, - ) - self.add_calculated_table_column( - table_name=table_name, - column_name=col2, - source_column="[Value2]", - data_type="String", - hidden=True, - ) - self.add_calculated_table_column( - table_name=table_name, - column_name=col3, - source_column="[Value3]", - data_type="Int64", - hidden=True, - ) - - self.set_extended_property( - self=self, - object=self.model.Tables[table_name].Columns[col2], - extended_property_type="Json", - name="ParameterMetadata", - value='{"version":3,"kind":2}', - ) - - rcd = TOM.RelatedColumnDetails() - gpc = TOM.GroupByColumn() - gpc.GroupingColumn = self.model.Tables[table_name].Columns[col2] - rcd.GroupByColumns.Add(gpc) - - # Update column properties - self.model.Tables[table_name].Columns[col2].SortByColumn = ( - self.model.Tables[table_name].Columns[col3] - ) - self.model.Tables[table_name].Columns[table_name].RelatedColumnDetails = rcd - - fpAdded.append(table_name) - - def remove_vertipaq_annotations(self): - """ - Removes the annotations set using the [set_vertipaq_annotations] function. - - Parameters - ---------- - - Returns - ------- - - """ - - for t in self.model.Tables: - for a in t.Annotations: - if a.Name.startswith("Vertipaq_"): - self.remove_annotation(object=t, name=a.Name) - for c in t.Columns: - for a in c.Annotations: - if a.Name.startswith("Vertipaq_"): - self.remove_annotation(object=c, name=a.Name) - for h in t.Hierarchies: - for a in h.Annotations: - if a.Name.startswith("Vertipaq_"): - self.remove_annotation(object=h, name=a.Name) - for p in t.Partitions: - for a in p.Annotations: - if a.Name.startswith("Vertipaq_"): - self.remove_annotation(object=p, name=a.Name) - for r in self.model.Relationships: - for a in r.Annotations: - if a.Name.startswith("Veripaq_"): - self.remove_annotation(object=r, name=a.Name) - - def set_vertipaq_annotations(self): - """ - Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. - - Parameters - ---------- - - Returns - ------- - - """ - - dfT = fabric.list_tables( - dataset=dataset, workspace=workspace, extended=True - ) - dfC = fabric.list_columns( - dataset=dataset, workspace=workspace, extended=True - ) - # intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - dfCSum = dfC.groupby(["Table Name"])["Total Size"].sum().reset_index() - dfTable = pd.merge( - dfT[["Name", "Type", "Row Count"]], - dfCSum[["Table Name", "Total Size"]], - left_on="Name", - right_on="Table Name", - how="inner", - ) - dfP = fabric.list_partitions( - dataset=dataset, workspace=workspace, extended=True - ) - dfP["Records per Segment"] = round( - dfP["Record Count"] / dfP["Segment Count"], 2 - ) - dfH = fabric.list_hierarchies( - dataset=dataset, workspace=workspace, extended=True - ) - dfR = list_relationships( - dataset=dataset, workspace=workspace, extended=True - ) - - for t in self.model.Tables: - dfT_filt = dfTable[dfTable["Name"] == t.Name] - rowCount = str(dfT_filt["Row Count"].iloc[0]) - totalSize = str(dfT_filt["Total Size"].iloc[0]) - self.set_annotation(object=t, name="Vertipaq_RowCount", value=rowCount) - self.set_annotation( - object=t, name="Vertipaq_TableSize", value=totalSize - ) - for c in t.Columns: - dfC_filt = dfC[ - (dfC["Table Name"] == t.Name) & (dfC["Column Name"] == c.Name) - ] - totalSize = str(dfC_filt["Total Size"].iloc[0]) - dataSize = str(dfC_filt["Data Size"].iloc[0]) - dictSize = str(dfC_filt["Dictionary Size"].iloc[0]) - hierSize = str(dfC_filt["Hierarchy Size"].iloc[0]) - card = str(dfC_filt["Column Cardinality"].iloc[0]) - self.set_annotation( - object=c, name="Vertipaq_TotalSize", value=totalSize - ) - self.set_annotation( - object=c, name="Vertipaq_DataSize", value=dataSize - ) - self.set_annotation( - object=c, name="Vertipaq_DictionarySize", value=dictSize - ) - self.set_annotation( - object=c, name="Vertipaq_HierarchySize", value=hierSize - ) - self.set_annotation( - object=c, name="Vertipaq_Cardinality", value=card - ) - for p in t.Partitions: - dfP_filt = dfP[ - (dfP["Table Name"] == t.Name) - & (dfP["Partition Name"] == p.Name) - ] - recordCount = str(dfP_filt["Record Count"].iloc[0]) - segmentCount = str(dfP_filt["Segment Count"].iloc[0]) - rpS = str(dfP_filt["Records per Segment"].iloc[0]) - self.set_annotation( - object=p, name="Vertipaq_RecordCount", value=recordCount - ) - self.set_annotation( - object=p, name="Vertipaq_SegmentCount", value=segmentCount - ) - self.set_annotation( - object=p, name="Vertipaq_RecordsPerSegment", value=rpS - ) - for h in t.Hierarchies: - dfH_filt = dfH[ - (dfH["Table Name"] == t.Name) - & (dfH["Hierarchy Name"] == h.Name) - ] - usedSize = str(dfH_filt["Used Size"].iloc[0]) - self.set_annotation( - object=h, name="Vertipaq_UsedSize", value=usedSize - ) - for r in self.model.Relationships: - dfR_filt = dfR[dfR["Relationship Name"] == r.Name] - relSize = str(dfR_filt["Used Size"].iloc[0]) - self.set_annotation(object=r, name="Vertipaq_UsedSize", value=relSize) - + measure_target = True + + try: + float(target) + tgt = str(target) + measure_target = False + except: try: - runId = self.get_annotation_value( - object=self.model, name="Vertipaq_Run" + tgt = next( + format_dax_object_name(m.Parent.Name, m.Name) + for m in self.all_measures() + if m.Name == target ) - runId = str(int(runId) + 1) except: - runId = "1" - self.set_annotation(object=self.model, name="Vertipaq_Run", value=runId) - - def row_count(self, object: Union["TOM.Partition", "TOM.Table"]): - """ - Obtains the row count of a table or partition within a semantic model. + print( + f"The '{target}' measure does not exist in the '{self.dataset}' semantic model within the '{self.workspace}'." + ) - Parameters - ---------- - object : TOM Object - The table/partition object within the semantic model. + if measure_target: + expr = f"var x = [{measure_name}]/[{target}]\nreturn" + else: + expr = f"var x = [{measure_name}\nreturn" + + if status_type == "Linear": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_bound},-1,\n\t\tif(x<{upper_bound},0,1)))" + elif status_type == "LinearReversed": + expr = f"{expr}\nif(isblank(x),blank(),\nif(x<{lower_bound},1,\n\t\tif(x<{upper_bound},0,-1)))" + elif status_type == "Centered": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},-1,0),\n\t\t\tif(x<{upper_mid_bound},1,\n\t\t\t\tif(x<{upper_bound}0,-1))))" + elif status_type == "CenteredReversed": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},1,0),\n\t\t\tif(x<{upper_mid_bound},-1,\n\t\t\t\tif(x<{upper_bound}0,1))))" + + kpi = TOM.KPI() + kpi.TargetExpression = tgt + kpi.StatusGraphic = status_graphic + kpi.StatusExpression = expr + + ms = self.model.Tables[table_name].Measures[measure_name] + try: + ms.KPI.TargetExpression = tgt + ms.KPI.StatusGraphic = status_graphic + ms.KPI.StatusExpression = expr + except: + ms.KPI = kpi + + def set_aggregations(self, table_name: str, agg_table_name: str): + """ + Sets the aggregations (alternate of) for all the columns in an aggregation table based on a base table. + + Parameters + ---------- + table_name : str + Name of the base table. + agg_table_name : str + Name of the aggregation table. + + Returns + ------- + + """ + + for c in self.model.Tables[agg_table_name].Columns: + + dataType = c.DataType + + if dataType in [ + TOM.DataType.String, + TOM.DataType.Boolean, + TOM.DataType.DateTime, + ]: + sumType = "GroupBy" + else: + sumType = "Sum" + + self.set_alternate_of( + table_name=agg_table_name, + column_name=c.Name, + base_table=table_name, + base_column=c.Name, + summarization_type=sumType, + ) - Returns - ------- - int - Number of rows within the TOM object. - """ + def set_is_available_in_mdx( + self, table_name: str, column_name: str, value: Optional[bool] = False + ): + """ + Sets the IsAvailableInMdx property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : bool, default=False + The IsAvailableInMdx property value. + """ + + self.model.Tables[table_name].Columns[column_name].IsAvailableInMdx = value + + def set_summarize_by( + self, table_name: str, column_name: str, value: Optional[str] = None + ): + """ + Sets the SummarizeBy property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : bool, default=None + The SummarizeBy property value. + Defaults to none which resolves to 'Default'. + `Valid values `_ + """ + import System + + values = [ + "Default", + "None", + "Sum", + "Min", + "Max", + "Count", + "Average", + "DistinctCount", + ] + # https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby + + if value is None: + value = "Default" + value = ( + value.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + if value not in values: + print( + f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].SummarizeBy = ( + System.Enum.Parse(TOM.AggregateFunction, value) + ) + + def set_direct_lake_behavior(self, direct_lake_behavior: str): + """ + Sets the Direct Lake Behavior property for a semantic model. + + Parameters + ---------- + direct_lake_behavior : str + The DirectLakeBehavior property value. + `Valid values `_ + """ + import System + + direct_lake_behavior = direct_lake_behavior.capitalize() + if direct_lake_behavior.startswith("Auto"): + direct_lake_behavior = "Automatic" + elif direct_lake_behavior.startswith("Directl") or direct_lake_behavior == "Dl": + direct_lake_behavior = "DirectLakeOnly" + elif direct_lake_behavior.startswith("Directq") or direct_lake_behavior == "Dq": + direct_lake_behavior = "DirectQueryOnly" + + dlValues = ["Automatic", "DirectLakeOnly", "DirectQueryOnly"] + + if direct_lake_behavior not in dlValues: + print( + f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}." + ) + return + + self.model.DirectLakeBehavior = System.Enum.Parse( + TOM.DirectLakeBehavior, direct_lake_behavior + ) + + def add_table( + self, + name: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a table to the semantic model. + + Parameters + ---------- + name : str + Name of the table. + description : str, default=None + A description of the table. + data_catgegory : str, default=None + The data category for the table. + hidden : bool, default=False + Whether the table is hidden or visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + t = TOM.Table() + t.Name = name + if description is not None: + t.Description = description + if data_category is not None: + t.DataCategory = data_category + t.Hidden = hidden + self.model.Tables.Add(t) + + def add_calculated_table( + self, + name: str, + expression: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a calculated table to the semantic model. + + Parameters + ---------- + name : str + Name of the table. + expression : str + The DAX expression for the calculated table. + description : str, default=None + A description of the table. + data_catgegory : str, default=None + The data category for the table. + hidden : bool, default=False + Whether the table is hidden or visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + par = TOM.Partition() + par.Name = name + + parSource = TOM.CalculatedPartitionSource() + parSource.Expression = expression + par.Source = parSource + + t = TOM.Table() + t.Name = name + if description is not None: + t.Description = description + if data_category is not None: + t.DataCategory = data_category + t.Hidden = hidden + t.Partitions.Add(par) + self.model.Tables.Add(t) + + def add_field_parameter(self, table_name: str, objects: List[str]): + """ + Adds a table to the semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + objects : List[str] + The columns/measures to be included in the field parameter. + Columns must be specified as such : 'Table Name'[Column Name]. + Measures may be formatted as '[Measure Name]' or 'Measure Name'. + """ + + if isinstance(objects, str): + print(f"The 'objects' parameter must be a list of columns/measures.") + return + if len(objects) == 1: + print( + f"There must be more than one object (column/measure) within the objects parameter." + ) + return - objType = object.ObjectType + expr = "" + i = 0 + for obj in objects: + success = False + for m in self.all_measures(): + if obj == "[" + m.Name + "]" or obj == m.Name: + expr = ( + expr + + "\n\t" + + '("' + + m.Name + + '", NAMEOF([' + + m.Name + + "]), " + + str(i) + + ")," + ) + success = True + for c in self.all_columns(): + fullObjName = format_dax_object_name(c.Parent.Name, c.Name) + if obj == fullObjName or obj == c.Parent.Name + "[" + c.Name + "]": + expr = ( + expr + + "\n\t" + + '("' + + c.Name + + '", NAMEOF(' + + fullObjName + + "), " + + str(i) + + ")," + ) + success = True + if not success: + print( + f"The '{obj}' object was not found in the '{self.dataset}' semantic model." + ) + return + else: + i += 1 - if objType == TOM.ObjectType.Table: - result = self.get_annotation_value( - object=object, name="Vertipaq_RowCount" + expr = "{" + expr.rstrip(",") + "\n}" + + self.add_calculated_table(name=table_name, expression=expr) + + col2 = table_name + " Fields" + col3 = table_name + " Order" + + self.add_calculated_table_column( + table_name=table_name, + column_name=table_name, + source_column="[Value1]", + data_type="String", + hidden=False, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col2, + source_column="[Value2]", + data_type="String", + hidden=True, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col3, + source_column="[Value3]", + data_type="Int64", + hidden=True, + ) + + self.set_extended_property( + self=self, + object=self.model.Tables[table_name].Columns[col2], + extended_property_type="Json", + name="ParameterMetadata", + value='{"version":3,"kind":2}', + ) + + rcd = TOM.RelatedColumnDetails() + gpc = TOM.GroupByColumn() + gpc.GroupingColumn = self.model.Tables[table_name].Columns[col2] + rcd.GroupByColumns.Add(gpc) + + # Update column properties + self.model.Tables[table_name].Columns[col2].SortByColumn = self.model.Tables[ + table_name + ].Columns[col3] + self.model.Tables[table_name].Columns[table_name].RelatedColumnDetails = rcd + + self.tables_added.append(table_name) + + def remove_vertipaq_annotations(self): + """ + Removes the annotations set using the [set_vertipaq_annotations] function. + """ + + for t in self.model.Tables: + for a in t.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=t, name=a.Name) + for c in t.Columns: + for a in c.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=c, name=a.Name) + for h in t.Hierarchies: + for a in h.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=h, name=a.Name) + for p in t.Partitions: + for a in p.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=p, name=a.Name) + for r in self.model.Relationships: + for a in r.Annotations: + if a.Name.startswith("Veripaq_"): + self.remove_annotation(object=r, name=a.Name) + + def set_vertipaq_annotations(self): + """ + Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. + """ + + dfT = fabric.list_tables( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfC = fabric.list_columns( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + # intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] + dfCSum = dfC.groupby(["Table Name"])["Total Size"].sum().reset_index() + dfTable = pd.merge( + dfT[["Name", "Type", "Row Count"]], + dfCSum[["Table Name", "Total Size"]], + left_on="Name", + right_on="Table Name", + how="inner", + ) + dfP = fabric.list_partitions( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfP["Records per Segment"] = round( + dfP["Record Count"] / dfP["Segment Count"], 2 + ) + dfH = fabric.list_hierarchies( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfR = list_relationships( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + + for t in self.model.Tables: + dfT_filt = dfTable[dfTable["Name"] == t.Name] + rowCount = str(dfT_filt["Row Count"].iloc[0]) + totalSize = str(dfT_filt["Total Size"].iloc[0]) + self.set_annotation(object=t, name="Vertipaq_RowCount", value=rowCount) + self.set_annotation(object=t, name="Vertipaq_TableSize", value=totalSize) + for c in t.Columns: + dfC_filt = dfC[ + (dfC["Table Name"] == t.Name) & (dfC["Column Name"] == c.Name) + ] + totalSize = str(dfC_filt["Total Size"].iloc[0]) + dataSize = str(dfC_filt["Data Size"].iloc[0]) + dictSize = str(dfC_filt["Dictionary Size"].iloc[0]) + hierSize = str(dfC_filt["Hierarchy Size"].iloc[0]) + card = str(dfC_filt["Column Cardinality"].iloc[0]) + self.set_annotation( + object=c, name="Vertipaq_TotalSize", value=totalSize + ) + self.set_annotation(object=c, name="Vertipaq_DataSize", value=dataSize) + self.set_annotation( + object=c, name="Vertipaq_DictionarySize", value=dictSize + ) + self.set_annotation( + object=c, name="Vertipaq_HierarchySize", value=hierSize + ) + self.set_annotation(object=c, name="Vertipaq_Cardinality", value=card) + for p in t.Partitions: + dfP_filt = dfP[ + (dfP["Table Name"] == t.Name) & (dfP["Partition Name"] == p.Name) + ] + recordCount = str(dfP_filt["Record Count"].iloc[0]) + segmentCount = str(dfP_filt["Segment Count"].iloc[0]) + rpS = str(dfP_filt["Records per Segment"].iloc[0]) + self.set_annotation( + object=p, name="Vertipaq_RecordCount", value=recordCount + ) + self.set_annotation( + object=p, name="Vertipaq_SegmentCount", value=segmentCount ) - elif objType == TOM.ObjectType.Partition: - result = self.get_annotation_value( - object=object, name="Vertipaq_RecordCount" + self.set_annotation( + object=p, name="Vertipaq_RecordsPerSegment", value=rpS ) + for h in t.Hierarchies: + dfH_filt = dfH[ + (dfH["Table Name"] == t.Name) & (dfH["Hierarchy Name"] == h.Name) + ] + usedSize = str(dfH_filt["Used Size"].iloc[0]) + self.set_annotation(object=h, name="Vertipaq_UsedSize", value=usedSize) + for r in self.model.Relationships: + dfR_filt = dfR[dfR["Relationship Name"] == r.Name] + relSize = str(dfR_filt["Used Size"].iloc[0]) + self.set_annotation(object=r, name="Vertipaq_UsedSize", value=relSize) + + try: + runId = self.get_annotation_value(object=self.model, name="Vertipaq_Run") + runId = str(int(runId) + 1) + except: + runId = "1" + self.set_annotation(object=self.model, name="Vertipaq_Run", value=runId) + + def row_count(self, object: Union["TOM.Partition", "TOM.Table"]): + """ + Obtains the row count of a table or partition within a semantic model. + + Parameters + ---------- + object : TOM Object + The table/partition object within the semantic model. + + Returns + ------- + int + Number of rows within the TOM object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Table: + result = self.get_annotation_value(object=object, name="Vertipaq_RowCount") + elif objType == TOM.ObjectType.Partition: + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordCount" + ) - return int(result) + return int(result) - def records_per_segment(self, object: "TOM.Partition"): - """ - Obtains the records per segment of a partition within a semantic model. + def records_per_segment(self, object: "TOM.Partition"): + """ + Obtains the records per segment of a partition within a semantic model. - Parameters - ---------- - object : TOM Object - The partition object within the semantic model. + Parameters + ---------- + object : TOM Object + The partition object within the semantic model. - Returns - ------- - float - Number of records per segment within the partition. - """ + Returns + ------- + float + Number of records per segment within the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = object.ObjectType + objType = object.ObjectType - if objType == TOM.ObjectType.Partition: - result = self.get_annotation_value( - object=object, name="Vertipaq_RecordsPerSegment" - ) + if objType == TOM.ObjectType.Partition: + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordsPerSegment" + ) - return float(result) + return float(result) - def used_size(self, object: Union["TOM.Hierarchy", "TOM.Relationship"]): - """ - Obtains the used size of a hierarchy or relationship within a semantic model. + def used_size(self, object: Union["TOM.Hierarchy", "TOM.Relationship"]): + """ + Obtains the used size of a hierarchy or relationship within a semantic model. - Parameters - ---------- - object : TOM Object - The hierarhcy/relationship object within the semantic model. + Parameters + ---------- + object : TOM Object + The hierarhcy/relationship object within the semantic model. - Returns - ------- - int - Used size of the TOM object. - """ + Returns + ------- + int + Used size of the TOM object. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = object.ObjectType + objType = object.ObjectType - if objType == TOM.ObjectType.Hierarchy: - result = self.get_annotation_value( - object=object, name="Vertipaq_UsedSize" - ) - elif objType == TOM.ObjectType.Relationship: - result = self.get_annotation_value( - object=object, name="Vertipaq_UsedSize" - ) + if objType == TOM.ObjectType.Hierarchy: + result = self.get_annotation_value(object=object, name="Vertipaq_UsedSize") + elif objType == TOM.ObjectType.Relationship: + result = self.get_annotation_value(object=object, name="Vertipaq_UsedSize") - return int(result) + return int(result) - def data_size(self, column: "TOM.Column"): - """ - Obtains the data size of a column within a semantic model. + def data_size(self, column: "TOM.Column"): + """ + Obtains the data size of a column within a semantic model. - Parameters - ---------- - column : TOM Object - The column object within the semantic model. + Parameters + ---------- + column : TOM Object + The column object within the semantic model. - Returns - ------- - int - Data size of the TOM column. - """ + Returns + ------- + int + Data size of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = column.ObjectType + objType = column.ObjectType - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value( - object=column, name="Vertipaq_DataSize" - ) + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value(object=column, name="Vertipaq_DataSize") - return int(result) + return int(result) - def dictionary_size(self, column: "TOM.Column"): - """ - Obtains the dictionary size of a column within a semantic model. + def dictionary_size(self, column: "TOM.Column"): + """ + Obtains the dictionary size of a column within a semantic model. - Parameters - ---------- - column : TOM Object - The column object within the semantic model. + Parameters + ---------- + column : TOM Object + The column object within the semantic model. - Returns - ------- - int - Dictionary size of the TOM column. - """ + Returns + ------- + int + Dictionary size of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = column.ObjectType + objType = column.ObjectType - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value( - object=column, name="Vertipaq_DictionarySize" - ) + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value( + object=column, name="Vertipaq_DictionarySize" + ) - return int(result) + return int(result) - def total_size(self, object: Union["TOM.Table", "TOM.Column"]): - """ - Obtains the data size of a table/column within a semantic model. + def total_size(self, object: Union["TOM.Table", "TOM.Column"]): + """ + Obtains the data size of a table/column within a semantic model. - Parameters - ---------- - object : TOM Object - The table/column object within the semantic model. + Parameters + ---------- + object : TOM Object + The table/column object within the semantic model. - Returns - ------- - int - Total size of the TOM table/column. - """ + Returns + ------- + int + Total size of the TOM table/column. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = object.ObjectType + objType = object.ObjectType - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value( - object=object, name="Vertipaq_TotalSize" - ) - elif objType == TOM.ObjectType.Table: - result = self.get_annotation_value( - object=object, name="Vertipaq_TotalSize" - ) + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value(object=object, name="Vertipaq_TotalSize") + elif objType == TOM.ObjectType.Table: + result = self.get_annotation_value(object=object, name="Vertipaq_TotalSize") - return int(result) + return int(result) - def cardinality(self, column: "TOM.Column"): - """ - Obtains the cardinality of a column within a semantic model. + def cardinality(self, column: "TOM.Column"): + """ + Obtains the cardinality of a column within a semantic model. - Parameters - ---------- - column : TOM Object - The column object within the semantic model. + Parameters + ---------- + column : TOM Object + The column object within the semantic model. - Returns - ------- - int - Cardinality of the TOM column. - """ + Returns + ------- + int + Cardinality of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM - objType = column.ObjectType + objType = column.ObjectType - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value( - object=column, name="Vertipaq_Cardinality" - ) + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value( + object=column, name="Vertipaq_Cardinality" + ) - return int(result) + return int(result) + + def depends_on(self, object, dependencies: pd.DataFrame): + """ + Obtains the objects on which the specified object depends. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + Objects on which the specified object depends. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + objName = object.Name + objParentName = object.Parent.Name + + if objType == TOM.ObjectType.Table: + objParentName = objName + + fil = dependencies[ + (dependencies["Object Type"] == objType) + & (dependencies["Table Name"] == objParentName) + & (dependencies["Object Name"] == objName) + ] + meas = ( + fil[fil["Referenced Object Type"] == "Measure"]["Referenced Object"] + .unique() + .tolist() + ) + cols = ( + fil[fil["Referenced Object Type"] == "Column"][ + "Referenced Full Object Name" + ] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Referenced Object Type"] == "Table"]["Referenced Table"] + .unique() + .tolist() + ) + for m in self.all_measures(): + if m.Name in meas: + yield m + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + for t in self.model.Tables: + if t.Name in tbls: + yield t + + def referenced_by(self, object, dependencies: pd.DataFrame): + """ + Obtains the objects which reference the specified object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + Objects which reference the specified object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + objName = object.Name + objParentName = object.Parent.Name + + if objType == TOM.ObjectType.Table: + objParentName = objName + + fil = dependencies[ + (dependencies["Referenced Object Type"] == objType) + & (dependencies["Referenced Table"] == objParentName) + & (dependencies["Referenced Object"] == objName) + ] + meas = fil[fil["Object Type"] == "Measure"]["Object Name"].unique().tolist() + cols = ( + fil[fil["Object Type"].isin(["Column", "Calc Column"])]["Full Object Name"] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Object Type"].isin(["Table", "Calc Table"])]["Table Name"] + .unique() + .tolist() + ) + for m in self.all_measures(): + if m.Name in meas: + yield m + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + for t in self.model.Tables: + if t.Name in tbls: + yield t + + def fully_qualified_measures( + self, object: "TOM.Measure", dependencies: pd.DataFrame + ): + """ + Obtains all fully qualified measure references for a given object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.MeasureCollection + All fully qualified measure references for a given object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for obj in self.depends_on(object=object, dependencies=dependencies): + if obj.ObjectType == TOM.ObjectType.Measure: + if (obj.Parent.Name + obj.Name in object.Expression) or ( + format_dax_object_name(obj.Parent.Name, obj.Name) + in object.Expression + ): + yield obj + + def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): + """ + Obtains all unqualified column references for a given object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.ColumnCollection + All unqualified column references for a given object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + def create_pattern(a, b): + return r"(? 0: + usingView = True - def referenced_by(self, object, dependencies: pd.DataFrame): - """ - Obtains the objects which reference the specified object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - Objects which reference the specified object. - """ - - objType = object.ObjectType - objName = object.Name - objParentName = object.Parent.Name - - if objType == TOM.ObjectType.Table: - objParentName = objName - - fil = dependencies[ - (dependencies["Referenced Object Type"] == objType) - & (dependencies["Referenced Table"] == objParentName) - & (dependencies["Referenced Object"] == objName) - ] - meas = fil[fil["Object Type"] == "Measure"]["Object Name"].unique().tolist() - cols = ( - fil[fil["Object Type"].isin(["Column", "Calc Column"])][ - "Full Object Name" - ] - .unique() - .tolist() - ) - tbls = ( - fil[fil["Object Type"].isin(["Table", "Calc Table"])]["Table Name"] - .unique() - .tolist() - ) - for m in self.all_measures(): - if m.Name in meas: - yield m - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - for t in self.model.Tables: - if t.Name in tbls: - yield t + return usingView - def fully_qualified_measures( - self, object: "TOM.Measure", dependencies: pd.DataFrame - ): - """ - Obtains all fully qualified measure references for a given object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.MeasureCollection - All fully qualified measure references for a given object. - """ - - for obj in self.depends_on(object=object, dependencies=dependencies): - if obj.ObjectType == TOM.ObjectType.Measure: - if (obj.Parent.Name + obj.Name in object.Expression) or ( - format_dax_object_name(obj.Parent.Name, obj.Name) - in object.Expression - ): - yield obj - - def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): - """ - Obtains all unqualified column references for a given object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.ColumnCollection - All unqualified column references for a given object. - """ - - def create_pattern(a, b): - return r"(? 0: - usingView = True - - return usingView - - def has_incremental_refresh_policy(self, table_name: str): - """ - Identifies whether a table has an incremental refresh policy. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - An indicator whether a table has an incremental refresh policy. - """ - - hasRP = False - rp = self.model.Tables[table_name].RefreshPolicy - - if rp is not None: - hasRP = True - - return hasRP - - def show_incremental_refresh_policy(self, table_name: str): - """ - Prints the incremental refresh policy for a table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - - """ - - rp = self.model.Tables[table_name].RefreshPolicy - - if rp is None: - print( - f"The '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace does not have an incremental refresh policy." - ) - else: - print(f"Table Name: {table_name}") - rwGran = str(rp.RollingWindowGranularity).lower() - icGran = str(rp.IncrementalGranularity).lower() - if rp.RollingWindowPeriods > 1: - print( - f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date." - ) - else: - print( - f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date." - ) - if rp.IncrementalPeriods > 1: - print( - f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date." - ) - else: - print( - f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date." - ) + def has_incremental_refresh_policy(self, table_name: str): + """ + Identifies whether a table has an incremental refresh policy. - if rp.Mode == TOM.RefreshPolicyMode.Hybrid: - print( - f"{checked} Get the latest data in real time with DirectQuery (Premium only)" - ) - else: - print( - f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)" - ) - if rp.IncrementalPeriodsOffset == -1: - print(f"{checked} Only refresh complete days") - else: - print(f"{unchecked} Only refresh complete days") - if len(rp.PollingExpression) > 0: - pattern = r"\[([^\]]+)\]" - match = re.search(pattern, rp.PollingExpression) - if match: - col = match[0][1:-1] - fullCol = format_dax_object_name(table_name, col) - print( - f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}" - ) - else: - print(f"{unchecked} Detect data changes") - - def update_incremental_refresh_policy( - self, - table_name: str, - incremental_granularity: str, - incremental_periods: int, - rolling_window_granularity: str, - rolling_window_periods: int, - only_refresh_complete_days: Optional[bool] = False, - detect_data_changes_column: Optional[str] = None, - ): - """ - Updates the incremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - incremental_granularity : str - Granularity of the (most recent) incremental refresh range. - incremental_periods : int - Number of periods for the incremental refresh range. - rolling_window_granularity : str - Target granularity of the rolling window for the whole semantic model. - rolling_window_periods : int - Number of periods for the rolling window for the whole semantic model. - only_refresh_complete_days : bool, default=False - Lag or leading periods from Now() to the rolling window head. - detect_data_changes_column : str, default=None - The column to use for detecting data changes. - Defaults to None which resolves to not detecting data changes. - - Returns - ------- - - """ - - if not self.has_incremental_refresh_policy(table_name=table_name): - print( - f"The '{table_name}' table does not have an incremental refresh policy." - ) - return + Parameters + ---------- + table_name : str + Name of the table. - incGran = ["Day", "Month", "Quarter", "Year"] + Returns + ------- + bool + An indicator whether a table has an incremental refresh policy. + """ - incremental_granularity = incremental_granularity.capitalize() - rolling_window_granularity = rolling_window_granularity.capitalize() + hasRP = False + rp = self.model.Tables[table_name].RefreshPolicy - if incremental_granularity not in incGran: - print( - f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." - ) - return - if rolling_window_granularity not in incGran: - print( - f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." - ) - return + if rp is not None: + hasRP = True - if rolling_window_periods < 1: - print( - f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." - ) - return - if incremental_periods < 1: - print( - f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." - ) - return + return hasRP - t = self.model.Tables[table_name] + def show_incremental_refresh_policy(self, table_name: str): + """ + Prints the incremental refresh policy for a table. - if detect_data_changes_column is not None: - dc = t.Columns[detect_data_changes_column] + Parameters + ---------- + table_name : str + Name of the table. + """ - if dc.DataType != TOM.DataType.DateTime: - print( - f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." - ) - return + rp = self.model.Tables[table_name].RefreshPolicy - rp = TOM.BasicRefreshPolicy() - rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse( - TOM.RefreshGranularityType, incremental_granularity - ) - rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse( - TOM.RefreshGranularityType, rolling_window_granularity + if rp is None: + print( + f"The '{table_name}' table in the '{self.dataset}' semantic model within the '{self.workspace}' workspace does not have an incremental refresh policy." ) - rp.SourceExpression = t.RefreshPolicy.SourceExpression - - if only_refresh_complete_days: - rp.IncrementalPeriodsOffset = -1 - else: - rp.IncrementalPeriodOffset = 0 - - if detect_data_changes_column is not None: - fullDC = format_dax_object_name(table_name, detect_data_changes_column) - ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" - rp.PollingExpression = ddcExpr - else: - rp.PollingExpression = None - - t.RefreshPolicy = rp - - self.show_incremental_refresh_policy(table_name=table_name) - - def add_incremental_refresh_policy( - self, - table_name: str, - column_name: str, - start_date: str, - end_date: str, - incremental_granularity: str, - incremental_periods: int, - rolling_window_granularity: str, - rolling_window_periods: int, - only_refresh_complete_days: Optional[bool] = False, - detect_data_changes_column: Optional[str] = None, - ): - """ - Adds anincremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - The DateTime column to be used for the RangeStart and RangeEnd parameters. - start_date : str - The date to be used for the RangeStart parameter. - end_date : str - The date to be used for the RangeEnd parameter. - incremental_granularity : str - Granularity of the (most recent) incremental refresh range. - incremental_periods : int - Number of periods for the incremental refresh range. - rolling_window_granularity : str - Target granularity of the rolling window for the whole semantic model. - rolling_window_periods : int - Number of periods for the rolling window for the whole semantic model. - only_refresh_complete_days : bool, default=False - Lag or leading periods from Now() to the rolling window head. - detect_data_changes_column : str, default=None - The column to use for detecting data changes. - Defaults to None which resolves to not detecting data changes. - - Returns - ------- - - """ - - # https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure - - incGran = ["Day", "Month", "Quarter", "Year"] - - incremental_granularity = incremental_granularity.capitalize() - rolling_window_granularity = rolling_window_granularity.capitalize() - - if incremental_granularity not in incGran: + else: + print(f"Table Name: {table_name}") + rwGran = str(rp.RollingWindowGranularity).lower() + icGran = str(rp.IncrementalGranularity).lower() + if rp.RollingWindowPeriods > 1: print( - f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date." ) - return - if rolling_window_granularity not in incGran: + else: print( - f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date." ) - return - - if rolling_window_periods < 1: + if rp.IncrementalPeriods > 1: print( - f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date." ) - return - if incremental_periods < 1: + else: print( - f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date." ) - return - - date_format = "%m/%d/%Y" - - date_obj_start = datetime.strptime(start_date, date_format) - start_year = date_obj_start.year - start_month = date_obj_start.month - start_day = date_obj_start.day - date_obj_end = datetime.strptime(end_date, date_format) - end_year = date_obj_end.year - end_month = date_obj_end.month - end_day = date_obj_end.day - - if date_obj_end <= date_obj_start: + if rp.Mode == TOM.RefreshPolicyMode.Hybrid: print( - f"{icons.red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'." + f"{checked} Get the latest data in real time with DirectQuery (Premium only)" ) - return - - t = self.model.Tables[table_name] - - c = t.Columns[column_name] - fcName = format_dax_object_name(table_name, column_name) - dType = c.DataType - - if dType != TOM.DataType.DateTime: + else: print( - f"{icons.red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type." + f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)" ) - return - - if detect_data_changes_column is not None: - dc = t.Columns[detect_data_changes_column] - dcType = dc.DataType - - if dcType != TOM.DataType.DateTime: + if rp.IncrementalPeriodsOffset == -1: + print(f"{checked} Only refresh complete days") + else: + print(f"{unchecked} Only refresh complete days") + if len(rp.PollingExpression) > 0: + pattern = r"\[([^\]]+)\]" + match = re.search(pattern, rp.PollingExpression) + if match: + col = match[0][1:-1] + fullCol = format_dax_object_name(table_name, col) print( - f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}" ) - return + else: + print(f"{unchecked} Detect data changes") + + def update_incremental_refresh_policy( + self, + table_name: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): + """ + Updates the incremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + incremental_granularity : str + Granularity of the (most recent) incremental refresh range. + incremental_periods : int + Number of periods for the incremental refresh range. + rolling_window_granularity : str + Target granularity of the rolling window for the whole semantic model. + rolling_window_periods : int + Number of periods for the rolling window for the whole semantic model. + only_refresh_complete_days : bool, default=False + Lag or leading periods from Now() to the rolling window head. + detect_data_changes_column : str, default=None + The column to use for detecting data changes. + Defaults to None which resolves to not detecting data changes. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if not self.has_incremental_refresh_policy(table_name=table_name): + print( + f"The '{table_name}' table does not have an incremental refresh policy." + ) + return - # Start changes: + incGran = ["Day", "Month", "Quarter", "Year"] - # Update partition expression - i = 0 - for p in t.Partitions: - if p.SourceType != TOM.PartitionSourceType.M: - print( - f"{icons.red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition." - ) - return - elif i == 0: - text = p.Expression - text = text.rstrip() + incremental_granularity = incremental_granularity.capitalize() + rolling_window_granularity = rolling_window_granularity.capitalize() - ind = text.rfind(" ") + 1 - obj = text[ind:] - pattern = r"in\s*[^ ]*" - matches = list(re.finditer(pattern, text)) + if incremental_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + ) + return + if rolling_window_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + ) + return - if matches: - last_match = matches[-1] - text_before_last_match = text[: last_match.start()] + if rolling_window_periods < 1: + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + ) + return + if incremental_periods < 1: + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + ) + return - print(text_before_last_match) - else: - print(f"{icons.red_dot} Invalid M-partition expression.") - return + t = self.model.Tables[table_name] - endExpr = f'#"Filtered Rows IR" = Table.SelectRows({obj}, each [{column_name}] >= RangeStart and [{column_name}] <= RangeEnd)\n#"Filtered Rows IR"' - finalExpr = text_before_last_match + endExpr + if detect_data_changes_column is not None: + dc = t.Columns[detect_data_changes_column] - p.Expression = finalExpr - i += 1 + if dc.DataType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + ) + return - # Add expressions - self.add_expression( - name="RangeStart", - expression=f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + rp = TOM.BasicRefreshPolicy() + rp.IncrementalPeriods = incremental_periods + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) + rp.RollingWindowPeriods = rolling_window_periods + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) + rp.SourceExpression = t.RefreshPolicy.SourceExpression + + if only_refresh_complete_days: + rp.IncrementalPeriodsOffset = -1 + else: + rp.IncrementalPeriodOffset = 0 + + if detect_data_changes_column is not None: + fullDC = format_dax_object_name(table_name, detect_data_changes_column) + ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" + rp.PollingExpression = ddcExpr + else: + rp.PollingExpression = None + + t.RefreshPolicy = rp + + self.show_incremental_refresh_policy(table_name=table_name) + + def add_incremental_refresh_policy( + self, + table_name: str, + column_name: str, + start_date: str, + end_date: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): + """ + Adds anincremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + The DateTime column to be used for the RangeStart and RangeEnd parameters. + start_date : str + The date to be used for the RangeStart parameter. + end_date : str + The date to be used for the RangeEnd parameter. + incremental_granularity : str + Granularity of the (most recent) incremental refresh range. + incremental_periods : int + Number of periods for the incremental refresh range. + rolling_window_granularity : str + Target granularity of the rolling window for the whole semantic model. + rolling_window_periods : int + Number of periods for the rolling window for the whole semantic model. + only_refresh_complete_days : bool, default=False + Lag or leading periods from Now() to the rolling window head. + detect_data_changes_column : str, default=None + The column to use for detecting data changes. + Defaults to None which resolves to not detecting data changes. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + # https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure + + incGran = ["Day", "Month", "Quarter", "Year"] + + incremental_granularity = incremental_granularity.capitalize() + rolling_window_granularity = rolling_window_granularity.capitalize() + + if incremental_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." ) - self.add_expression( - name="RangeEnd", - expression=f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + return + if rolling_window_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." ) + return - # Update properties - rp = TOM.BasicRefreshPolicy() - rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse( - TOM.RefreshGranularityType, incremental_granularity + if rolling_window_periods < 1: + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." ) - rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse( - TOM.RefreshGranularityType, rolling_window_granularity + return + if incremental_periods < 1: + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." ) + return - if only_refresh_complete_days: - rp.IncrementalPeriodsOffset = -1 - else: - rp.IncrementalPeriodOffset = 0 - - if detect_data_changes_column is not None: - fullDC = format_dax_object_name(table_name, detect_data_changes_column) - ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" - rp.PollingExpression = ddcExpr + date_format = "%m/%d/%Y" - t.RefreshPolicy = rp + date_obj_start = datetime.strptime(start_date, date_format) + start_year = date_obj_start.year + start_month = date_obj_start.month + start_day = date_obj_start.day - self.show_incremental_refresh_policy(table_name=table_name) + date_obj_end = datetime.strptime(end_date, date_format) + end_year = date_obj_end.year + end_month = date_obj_end.month + end_day = date_obj_end.day - def apply_refresh_policy( - self, - table_name: str, - effective_date: Optional[datetime] = None, - refresh: Optional[bool] = True, - max_parallelism: Optional[int] = 0, - ): - """ - Applies the incremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - effective_date : DateTime, default=None - The effective date that is used when calculating the partitioning scheme. - refresh : bool, default=True - An indication if partitions of the table should be refreshed or not; the default behavior is to do the refresh. - max_parallelism : int, default=0 - The degree of parallelism during the refresh execution. - - Returns - ------- - - """ - - self.model.Tables[table_name].ApplyRefreshPolicy( - effectiveDate=effective_date, - refresh=refresh, - maxParallelism=max_parallelism, + if date_obj_end <= date_obj_start: + print( + f"{icons.red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'." ) + return - def set_data_coverage_definition( - self, table_name: str, partition_name: str, expression: str - ): - """ - Sets the data coverage definition for a partition. - - Parameters - ---------- - table_name : str - Name of the table. - partition_name : str - Name of the partition. - expression : str - DAX expression containing the logic for the data coverage definition. + t = self.model.Tables[table_name] - Returns - ------- + c = t.Columns[column_name] + fcName = format_dax_object_name(table_name, column_name) + dType = c.DataType - """ - - doc = "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions" - - t = self.model.Tables[table_name] - p = t.Partitions[partition_name] + if dType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type." + ) + return - ht = self.is_hybrid_table(table_name=table_name) + if detect_data_changes_column is not None: + dc = t.Columns[detect_data_changes_column] + dcType = dc.DataType - if not ht: + if dcType != TOM.DataType.DateTime: print( - f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}." + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." ) return - if p.Mode != TOM.ModeType.DirectQuery: - print( - f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}." - ) - return - - dcd = TOM.DataCoverageDefinition() - dcd.Expression = expression - p.DataCoverageDefinition = dcd - def set_encoding_hint(self, table_name: str, column_name: str, value: str): - """ - Sets the encoding hint for a column. + # Start changes: - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : str - Encoding hint value. - `Valid values `_ - - Returns - ------- - - """ - - values = ["Default", "Hash", "Value"] - value = value.capitalize() - - if value not in values: + # Update partition expression + i = 0 + for p in t.Partitions: + if p.SourceType != TOM.PartitionSourceType.M: print( - f"{icons.red_dot} Invalid encoding hint value. Please choose from these options: {values}." + f"{icons.red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition." ) return + elif i == 0: + text = p.Expression + text = text.rstrip() - self.model.Tables[table_name].Columns[column_name].EncodingHint = ( - System.Enum.Parse(TOM.EncodingHintType, value) - ) + ind = text.rfind(" ") + 1 + obj = text[ind:] + pattern = r"in\s*[^ ]*" + matches = list(re.finditer(pattern, text)) - def set_data_type(self, table_name: str, column_name: str, value: str): - """ - Sets the data type for a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : str - The data type. - `Valid values `_ - - Returns - ------- - - """ - - values = [ - "Binary", - "Boolean", - "DateTime", - "Decimal", - "Double", - "Int64", - "String", - ] + if matches: + last_match = matches[-1] + text_before_last_match = text[: last_match.start()] - value = value.replace(" ", "").capitalize() - if value == "Datetime": - value = "DateTime" - elif value.startswith("Int"): - value = "Int64" - elif value.startswith("Bool"): - value = "Boolean" + print(text_before_last_match) + else: + print(f"{icons.red_dot} Invalid M-partition expression.") + return - if value not in values: + endExpr = f'#"Filtered Rows IR" = Table.SelectRows({obj}, each [{column_name}] >= RangeStart and [{column_name}] <= RangeEnd)\n#"Filtered Rows IR"' + finalExpr = text_before_last_match + endExpr + + p.Expression = finalExpr + i += 1 + + # Add expressions + self.add_expression( + name="RangeStart", + expression=f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) + self.add_expression( + name="RangeEnd", + expression=f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) + + # Update properties + rp = TOM.BasicRefreshPolicy() + rp.IncrementalPeriods = incremental_periods + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) + rp.RollingWindowPeriods = rolling_window_periods + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) + + if only_refresh_complete_days: + rp.IncrementalPeriodsOffset = -1 + else: + rp.IncrementalPeriodOffset = 0 + + if detect_data_changes_column is not None: + fullDC = format_dax_object_name(table_name, detect_data_changes_column) + ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" + rp.PollingExpression = ddcExpr + + t.RefreshPolicy = rp + + self.show_incremental_refresh_policy(table_name=table_name) + + def apply_refresh_policy( + self, + table_name: str, + effective_date: Optional[datetime] = None, + refresh: Optional[bool] = True, + max_parallelism: Optional[int] = 0, + ): + """ + Applies the incremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + effective_date : DateTime, default=None + The effective date that is used when calculating the partitioning scheme. + refresh : bool, default=True + An indication if partitions of the table should be refreshed or not; the default behavior is to do the refresh. + max_parallelism : int, default=0 + The degree of parallelism during the refresh execution. + """ + + self.model.Tables[table_name].ApplyRefreshPolicy( + effectiveDate=effective_date, + refresh=refresh, + maxParallelism=max_parallelism, + ) + + def set_data_coverage_definition( + self, table_name: str, partition_name: str, expression: str + ): + """ + Sets the data coverage definition for a partition. + + Parameters + ---------- + table_name : str + Name of the table. + partition_name : str + Name of the partition. + expression : str + DAX expression containing the logic for the data coverage definition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + doc = "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions" + + t = self.model.Tables[table_name] + p = t.Partitions[partition_name] + + ht = self.is_hybrid_table(table_name=table_name) + + if not ht: + print( + f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}." + ) + return + if p.Mode != TOM.ModeType.DirectQuery: + print( + f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}." + ) + return + + dcd = TOM.DataCoverageDefinition() + dcd.Expression = expression + p.DataCoverageDefinition = dcd + + def set_encoding_hint(self, table_name: str, column_name: str, value: str): + """ + Sets the encoding hint for a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : str + Encoding hint value. + `Valid values `_ + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + values = ["Default", "Hash", "Value"] + value = value.capitalize() + + if value not in values: + print( + f"{icons.red_dot} Invalid encoding hint value. Please choose from these options: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].EncodingHint = ( + System.Enum.Parse(TOM.EncodingHintType, value) + ) + + def set_data_type(self, table_name: str, column_name: str, value: str): + """ + Sets the data type for a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : str + The data type. + `Valid values `_ + """ + import System + + values = [ + "Binary", + "Boolean", + "DateTime", + "Decimal", + "Double", + "Int64", + "String", + ] + + value = value.replace(" ", "").capitalize() + if value == "Datetime": + value = "DateTime" + elif value.startswith("Int"): + value = "Int64" + elif value.startswith("Bool"): + value = "Boolean" + + if value not in values: + print( + f"{icons.red_dot} Invalid data type. Please choose from these options: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].DataType = System.Enum.Parse( + TOM.DataType, value + ) + + def add_time_intelligence( + self, measure_name: str, date_table: str, time_intel: Union[str, List[str]] + ): + """ + Adds time intelligence measures + + Parameters + ---------- + measure_name : str + Name of the measure + date_table : str + Name of the date table. + time_intel : str, List[str] + Time intelligence measures to create (i.e. MTD, YTD, QTD). + """ + + table_name = None + time_intel_options = ["MTD", "QTD", "YTD"] + + if isinstance(time_intel, str): + time_intel = [time_intel] + + # Validate time intelligence variations + for t in time_intel: + t = t.capitalize() + if t not in [time_intel_options]: print( - f"{icons.red_dot} Invalid data type. Please choose from these options: {values}." + f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}." ) return - self.model.Tables[table_name].Columns[column_name].DataType = ( - System.Enum.Parse(TOM.DataType, value) - ) - - def add_time_intelligence( - self, measure_name: str, date_table: str, time_intel: Union[str, List[str]] - ): - """ - Adds time intelligence measures - - Parameters - ---------- - measure_name : str - Name of the measure - date_table : str - Name of the date table. - time_intel : str, List[str] - Time intelligence measures to create (i.e. MTD, YTD, QTD). + # Validate measure and extract table name + for m in self.all_measures(): + if m.Name == measure_name: + table_name = m.Parent.Name - Returns - ------- + if table_name is None: + print( + f"The '{measure_name}' is not a valid measure in the '{self.dataset}' semantic model within the '{self.workspace}' workspace." + ) + return - """ + # Validate date table + if not self.is_date_table(date_table): + print( + f"{icons.red_dot} The '{date_table}' table is not a valid date table in the '{self.dataset}' wemantic model within the '{self.workspace}' workspace." + ) + return + + # Extract date key from date table + for c in self.all_columns(): + if c.Parent.Name == date_table and c.IsKey: + date_key = c.Name + + # Create the new time intelligence measures + for t in time_intel: + if t == "MTD": + expr = f"CALCULATE([{measure_name}],DATES{time_intel}('{date_table}'[{date_key}]))" + new_meas_name = f"{measure_name} {t}" + self.add_measure( + table_name=table_name, + measure_name=new_meas_name, + expression=expr, + ) - table_name = None - time_intel_options = ["MTD", "QTD", "YTD"] + def close(self): + if not self.readonly and self.model is not None: + self.model.SaveChanges() - if isinstance(time_intel, str): - time_intel = [time_intel] + if len(self.tables_added) > 0: + refresh_semantic_model( + dataset=self.dataset, + tables=self.tables_added, + workspace=self.workspace, + ) + self.model = None - # Validate time intelligence variations - for t in time_intel: - t = t.capitalize() - if t not in [time_intel_options]: - print( - f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}." - ) - return + self.tom_server.Dispose() - # Validate measure and extract table name - for m in self.all_measures(): - if m.Name == measure_name: - table_name = m.Parent.Name - if table_name is None: - print( - f"The '{measure_name}' is not a valid measure in the '{dataset}' semantic model within the '{workspace}' workspace." - ) - return +@log +@contextmanager +def connect_semantic_model( + dataset: str, readonly: bool = True, workspace: Optional[str] = None +): + """ + Connects to the Tabular Object Model (TOM) within a semantic model. - # Validate date table - if not self.is_date_table(date_table): - print( - f"{icons.red_dot} The '{date_table}' table is not a valid date table in the '{dataset}' wemantic model within the '{workspace}' workspace." - ) - return + Parameters + ---------- + dataset : str + Name of the semantic model. + readonly: bool, default=True + Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. - # Extract date key from date table - for c in self.all_columns(): - if c.Parent.Name == date_table and c.IsKey: - date_key = c.Name - - # Create the new time intelligence measures - for t in time_intel: - if t == "MTD": - expr = f"CALCULATE([{measure_name}],DATES{time_intel}('{date_table}'[{date_key}]))" - new_meas_name = f"{measure_name} {t}" - self.add_measure( - table_name=table_name, - measure_name=new_meas_name, - expression=expr, - ) + Returns + ------- + str + A connection to the semantic model's Tabular Object Model. + """ - def close(self): - if not readonly and self.model is not None: - self.model.SaveChanges() + # initialize .NET to make sure System and Microsoft.AnalysisServices.Tabular is defined + sempy.fabric._client._utils._init_analysis_services() - if len(fpAdded) > 0: - refresh_semantic_model( - dataset=dataset, tables=fpAdded, workspace=workspace - ) - self.model = None + if workspace is None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) tw = TOMWrapper(dataset=dataset, workspace=workspace, readonly=readonly) try: diff --git a/sempy_labs/directlake/_directlake_schema_compare.py b/sempy_labs/directlake/_directlake_schema_compare.py index 908a5c41..d34ef558 100644 --- a/sempy_labs/directlake/_directlake_schema_compare.py +++ b/sempy_labs/directlake/_directlake_schema_compare.py @@ -6,6 +6,7 @@ resolve_lakehouse_name, get_direct_lake_sql_endpoint, ) +from IPython.display import display from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns from sempy_labs._list_functions import list_tables from typing import Optional diff --git a/sempy_labs/directlake/_get_directlake_lakehouse.py b/sempy_labs/directlake/_get_directlake_lakehouse.py index 1fa8f778..2ba51cb3 100644 --- a/sempy_labs/directlake/_get_directlake_lakehouse.py +++ b/sempy_labs/directlake/_get_directlake_lakehouse.py @@ -5,8 +5,9 @@ resolve_lakehouse_name, get_direct_lake_sql_endpoint, ) -from typing import List, Optional, Union +from typing import Optional, Tuple from uuid import UUID +from sempy_labs._helper_functions import resolve_workspace_name_and_id def get_direct_lake_lakehouse( @@ -14,7 +15,7 @@ def get_direct_lake_lakehouse( workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None, -): +) -> Tuple[str, UUID]: """ Identifies the lakehouse used by a Direct Lake semantic model. @@ -36,15 +37,11 @@ def get_direct_lake_lakehouse( Returns ------- - str, UUID + str, uuid.UUID The lakehouse name and lakehouse ID. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if lakehouse_workspace is None: lakehouse_workspace = workspace @@ -57,16 +54,16 @@ def get_direct_lake_lakehouse( dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: - print( + raise ValueError( f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode." ) - else: - sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") - dfI_filt = dfI[dfI["Id"] == sqlEndpointId] - lakehouseName = dfI_filt["Display Name"].iloc[0] + sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace) + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[dfI["Id"] == sqlEndpointId] + lakehouseName = dfI_filt["Display Name"].iloc[0] - return lakehouseName, lakehouseId + lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace) + + return lakehouseName, lakehouseId diff --git a/sempy_labs/directlake/_get_shared_expression.py b/sempy_labs/directlake/_get_shared_expression.py index 409ae709..fd1119bf 100644 --- a/sempy_labs/directlake/_get_shared_expression.py +++ b/sempy_labs/directlake/_get_shared_expression.py @@ -1,6 +1,9 @@ import sempy import sempy.fabric as fabric -from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) from sempy_labs._list_functions import list_lakehouses from typing import Optional @@ -27,11 +30,7 @@ def get_shared_expression( Shows the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id) diff --git a/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py index 54a0a1b0..98391029 100644 --- a/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +++ b/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py @@ -1,7 +1,10 @@ import sempy import sempy.fabric as fabric from sempy_labs.directlake._get_shared_expression import get_shared_expression -from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union @@ -36,11 +39,7 @@ def update_direct_lake_model_lakehouse_connection( """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse_workspace == None: lakehouse_workspace = workspace diff --git a/sempy_labs/directlake/_update_directlake_partition_entity.py b/sempy_labs/directlake/_update_directlake_partition_entity.py index 97a44069..d710b146 100644 --- a/sempy_labs/directlake/_update_directlake_partition_entity.py +++ b/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -1,4 +1,3 @@ -import sempy import sempy.fabric as fabric from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union @@ -27,11 +26,7 @@ def update_direct_lake_partition_entity( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) # Support both str & list types if isinstance(table_name, str): diff --git a/sempy_labs/directlake/_warm_cache.py b/sempy_labs/directlake/_warm_cache.py index 47aa8609..d995b437 100644 --- a/sempy_labs/directlake/_warm_cache.py +++ b/sempy_labs/directlake/_warm_cache.py @@ -7,7 +7,7 @@ from sempy_labs._helper_functions import format_dax_object_name from sempy_labs._refresh_semantic_model import refresh_semantic_model from sempy_labs._model_dependencies import get_measure_dependencies -from typing import List, Optional, Union +from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons @@ -37,14 +37,11 @@ def warm_direct_lake_cache_perspective( Returns ------- - + pandas.DataFrame + Returns a pandas dataframe showing the columns that have been put into memory. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): @@ -148,7 +145,9 @@ def warm_direct_lake_cache_perspective( @log -def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = None): +def warm_direct_lake_cache_isresident( + dataset: str, workspace: Optional[str] = None +) -> pd.DataFrame: """ Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. @@ -163,15 +162,10 @@ def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = N Returns ------- - + pandas.DataFrame + Returns a pandas dataframe showing the columns that have been put into memory. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): print( diff --git a/sempy_labs/lakehouse/_get_lakehouse_columns.py b/sempy_labs/lakehouse/_get_lakehouse_columns.py index 7ed371d4..cfb3d387 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_columns.py +++ b/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -32,6 +32,7 @@ def get_lakehouse_columns( Shows the tables/columns within a lakehouse and their properties. """ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables + from delta import DeltaTable df = pd.DataFrame( columns=[ @@ -44,11 +45,7 @@ def get_lakehouse_columns( ] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() diff --git a/sempy_labs/lakehouse/_get_lakehouse_tables.py b/sempy_labs/lakehouse/_get_lakehouse_tables.py index e907017d..f80b3e18 100644 --- a/sempy_labs/lakehouse/_get_lakehouse_tables.py +++ b/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -1,10 +1,13 @@ -import sempy import sempy.fabric as fabric import pandas as pd from pyspark.sql import SparkSession import pyarrow.parquet as pq import datetime -from sempy_labs._helper_functions import resolve_lakehouse_id, resolve_lakehouse_name +from sempy_labs._helper_functions import ( + resolve_lakehouse_id, + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) from sempy_labs.directlake._guardrails import ( get_sku_size, get_directlake_guardrails_for_sku, @@ -56,11 +59,7 @@ def get_lakehouse_tables( ] ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() diff --git a/sempy_labs/lakehouse/_lakehouse.py b/sempy_labs/lakehouse/_lakehouse.py index 265e66d9..50e55c40 100644 --- a/sempy_labs/lakehouse/_lakehouse.py +++ b/sempy_labs/lakehouse/_lakehouse.py @@ -47,6 +47,7 @@ def optimize_lakehouse_tables( """ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables + from delta import DeltaTable if workspace == None: workspace_id = fabric.get_workspace_id() diff --git a/sempy_labs/lakehouse/_shortcuts.py b/sempy_labs/lakehouse/_shortcuts.py index bf0a2d13..a02073ed 100644 --- a/sempy_labs/lakehouse/_shortcuts.py +++ b/sempy_labs/lakehouse/_shortcuts.py @@ -1,7 +1,11 @@ import sempy import sempy.fabric as fabric import pandas as pd -from sempy_labs._helper_functions import resolve_lakehouse_name, resolve_lakehouse_id +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_lakehouse_id, + resolve_workspace_name_and_id, +) from typing import List, Optional, Union import sempy_labs._icons as icons @@ -33,10 +37,6 @@ def create_shortcut_onelake( or if no lakehouse attached, resolves to the workspace of the notebook. shortcut_name : str, default=None The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. - - Returns - ------- - """ sourceWorkspaceId = fabric.resolve_workspace_id(source_workspace) @@ -110,10 +110,6 @@ def create_shortcut( The name of the Fabric workspace in which the shortcut will be created. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ source_titles = {"adlsGen2": "ADLS Gen2", "amazonS3": "Amazon S3"} @@ -128,11 +124,7 @@ def create_shortcut( sourceTitle = source_titles[source] - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() @@ -171,7 +163,9 @@ def create_shortcut( ) -def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def list_shortcuts( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +) -> pd.DataFrame: """ Shows all shortcuts which exist in a Fabric lakehouse. @@ -191,11 +185,7 @@ def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = N A pandas dataframe showing all the shortcuts which exist in the specified lakehouse. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() @@ -283,17 +273,9 @@ def delete_shortcut( The name of the Fabric workspace in which lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() diff --git a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py index e1513ab8..854b42a2 100644 --- a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py +++ b/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -50,11 +50,7 @@ def migrate_calc_tables_to_lakehouse( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if new_dataset_workspace == None: new_dataset_workspace = workspace diff --git a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py index 98db9370..1eb4cc68 100644 --- a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py +++ b/sempy_labs/migration/_migrate_calctables_to_semantic_model.py @@ -44,11 +44,7 @@ def migrate_calc_tables_to_semantic_model( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if new_dataset_workspace == None: new_dataset_workspace = workspace diff --git a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py index a7400081..5a9721d2 100644 --- a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py +++ b/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py @@ -48,11 +48,7 @@ def migrate_tables_columns_to_semantic_model( """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if new_dataset_workspace == None: new_dataset_workspace = workspace diff --git a/sempy_labs/migration/_migration_validation.py b/sempy_labs/migration/_migration_validation.py index 2df1b4a5..42935d6d 100644 --- a/sempy_labs/migration/_migration_validation.py +++ b/sempy_labs/migration/_migration_validation.py @@ -27,9 +27,6 @@ def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None): A pandas dataframe showing a list of objects in the semantic model """ - if workspace is None: - workspace = fabric.resolve_workspace_name() - df = pd.DataFrame(columns=["Parent Name", "Object Name", "Object Type"]) with connect_semantic_model( dataset=dataset, workspace=workspace, readonly=True @@ -177,7 +174,7 @@ def migration_validation( new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, -): +) -> pd.DataFrame: """ Shows the objects in the original semantic model and whether then were migrated successfully or not. diff --git a/sempy_labs/migration/_refresh_calc_tables.py b/sempy_labs/migration/_refresh_calc_tables.py index 130a6108..467b223a 100644 --- a/sempy_labs/migration/_refresh_calc_tables.py +++ b/sempy_labs/migration/_refresh_calc_tables.py @@ -24,10 +24,6 @@ def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - spark = SparkSession.builder.getOrCreate() start_time = datetime.datetime.now() diff --git a/sempy_labs/report/_generate_report.py b/sempy_labs/report/_generate_report.py index 94139157..a9b560bf 100644 --- a/sempy_labs/report/_generate_report.py +++ b/sempy_labs/report/_generate_report.py @@ -2,7 +2,8 @@ import sempy.fabric as fabric import pandas as pd import json, base64, time -from typing import List, Optional, Union +from typing import Optional +from sempy_labs._helper_functions import resolve_workspace_name_and_id def create_report_from_reportjson( @@ -31,11 +32,7 @@ def create_report_from_reportjson( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) objectType = "Report" @@ -168,11 +165,7 @@ def update_report_from_reportjson( or if no lakehouse attached, resolves to the workspace of the notebook. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) objectType = "Report" diff --git a/sempy_labs/report/_report_functions.py b/sempy_labs/report/_report_functions.py index 22153f8b..4aa49675 100644 --- a/sempy_labs/report/_report_functions.py +++ b/sempy_labs/report/_report_functions.py @@ -15,8 +15,9 @@ resolve_report_id, resolve_lakehouse_name, language_validate, + resolve_workspace_name_and_id, ) -from typing import List, Optional, Union +from typing import Any, List, Optional, Union from sempy._utils._log import log import sempy_labs._icons as icons @@ -25,7 +26,7 @@ def get_report_json( report: str, workspace: Optional[str] = None, save_to_file_name: Optional[str] = None, -): +) -> Any: """ Gets the report.json file content of a Power BI report. @@ -42,15 +43,11 @@ def get_report_json( Returns ------- - str + Any The report.json file for a given Power BI report. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() @@ -199,11 +196,7 @@ def export_report( ) return - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if isinstance(page_name, str): page_name = [page_name] @@ -239,14 +232,14 @@ def export_report( } export_format = export_format.upper() - if export_format not in validFormats: + + fileExt = validFormats.get(export_format) + if fileExt is None: print( f"{icons.red_dot} The '{export_format}' format is not a valid format for exporting Power BI reports. Please enter a valid format. Options: {validFormats}" ) return - fileExt = validFormats.get(export_format) - if file_name == None: file_name = report + fileExt else: @@ -448,11 +441,7 @@ def clone_report( # https://learn.microsoft.com/rest/api/power-bi/reports/clone-report-in-group - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) dfI = fabric.list_items(workspace=workspace, type="Report") dfI_filt = dfI[(dfI["Display Name"] == report)] @@ -545,11 +534,7 @@ def launch_report(report: str, workspace: Optional[str] = None): from .HelperFunctions import resolve_report_id - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) reportId = resolve_report_id(report, workspace) diff --git a/setup.py b/setup.py deleted file mode 100644 index 24a4a224..00000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -import setuptools - -with open("README.md", "r") as fh: - long_description = fh.read() - -setuptools.setup( - name="semantic-link-labs", - version="0.4.1", - author="Microsoft", - author_email="semanticdatascience@service.microsoft.com", - description="", - long_description=long_description, - long_description_content_type="text/markdown", - packages=setuptools.find_packages(), - install_requires=[ - 'semantic-link-sempy>=0.7.5', - 'anytree', - 'powerbiclient', - ], - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.8', -) \ No newline at end of file diff --git a/tests/test_tom.py b/tests/test_tom.py new file mode 100644 index 00000000..bbc130e1 --- /dev/null +++ b/tests/test_tom.py @@ -0,0 +1,31 @@ +import sempy.fabric +from unittest.mock import patch +from sempy_labs import connect_semantic_model + + +@patch("sempy.fabric.resolve_workspace_name") +@patch("sempy.fabric.create_tom_server") +def test_tom_wrapper(create_tom_server, resolve_workspace_name): + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + # create dummy server, database and model + tom_server = TOM.Server() + + db = TOM.Database() + db.Name = "my_dataset" + db.ID = "my_dataset" + db.Model = TOM.Model() + tom_server.Databases.Add(db) + + create_tom_server.return_value = tom_server + + resolve_workspace_name.return_value = "my_workspace" + + # invoke the wrapper + with connect_semantic_model("my_dataset") as tom: + tom.add_table("my_table") + + # validate the result + assert tom_server.Databases["my_dataset"].Model.Tables["my_table"] is not None From d193cfa9245e49566ee94c57ac46db18a6bc29e9 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 11 Jun 2024 13:11:11 +0000 Subject: [PATCH 20/23] fix toml based build --- pyproject.toml | 2 +- {sempy_labs => src/sempy_labs}/__init__.py | 0 {sempy_labs => src/sempy_labs}/_ai.py | 0 {sempy_labs => src/sempy_labs}/_clear_cache.py | 0 {sempy_labs => src/sempy_labs}/_connections.py | 0 {sempy_labs => src/sempy_labs}/_dax.py | 0 {sempy_labs => src/sempy_labs}/_generate_semantic_model.py | 0 {sempy_labs => src/sempy_labs}/_helper_functions.py | 0 {sempy_labs => src/sempy_labs}/_icons.py | 0 {sempy_labs => src/sempy_labs}/_list_functions.py | 0 {sempy_labs => src/sempy_labs}/_model_auto_build.py | 0 {sempy_labs => src/sempy_labs}/_model_bpa.py | 0 {sempy_labs => src/sempy_labs}/_model_dependencies.py | 0 {sempy_labs => src/sempy_labs}/_one_lake_integration.py | 0 {sempy_labs => src/sempy_labs}/_query_scale_out.py | 0 {sempy_labs => src/sempy_labs}/_refresh_semantic_model.py | 0 {sempy_labs => src/sempy_labs}/_tom.py | 0 {sempy_labs => src/sempy_labs}/_translations.py | 0 {sempy_labs => src/sempy_labs}/_vertipaq.py | 0 {sempy_labs => src/sempy_labs}/directlake/__init__.py | 0 .../sempy_labs}/directlake/_directlake_schema_compare.py | 0 .../sempy_labs}/directlake/_directlake_schema_sync.py | 0 {sempy_labs => src/sempy_labs}/directlake/_fallback.py | 0 .../sempy_labs}/directlake/_get_directlake_lakehouse.py | 0 .../sempy_labs}/directlake/_get_shared_expression.py | 0 {sempy_labs => src/sempy_labs}/directlake/_guardrails.py | 0 .../directlake/_list_directlake_model_calc_tables.py | 0 .../directlake/_show_unsupported_directlake_objects.py | 0 .../directlake/_update_directlake_model_lakehouse_connection.py | 0 .../directlake/_update_directlake_partition_entity.py | 0 {sempy_labs => src/sempy_labs}/directlake/_warm_cache.py | 0 {sempy_labs => src/sempy_labs}/lakehouse/__init__.py | 0 .../sempy_labs}/lakehouse/_get_lakehouse_columns.py | 0 .../sempy_labs}/lakehouse/_get_lakehouse_tables.py | 0 {sempy_labs => src/sempy_labs}/lakehouse/_lakehouse.py | 0 {sempy_labs => src/sempy_labs}/lakehouse/_shortcuts.py | 0 {sempy_labs => src/sempy_labs}/migration/__init__.py | 0 {sempy_labs => src/sempy_labs}/migration/_create_pqt_file.py | 0 .../sempy_labs}/migration/_migrate_calctables_to_lakehouse.py | 0 .../migration/_migrate_calctables_to_semantic_model.py | 0 .../migration/_migrate_model_objects_to_semantic_model.py | 0 .../migration/_migrate_tables_columns_to_semantic_model.py | 0 .../sempy_labs}/migration/_migration_validation.py | 0 .../sempy_labs}/migration/_refresh_calc_tables.py | 0 {sempy_labs => src/sempy_labs}/report/__init__.py | 0 {sempy_labs => src/sempy_labs}/report/_generate_report.py | 0 {sempy_labs => src/sempy_labs}/report/_report_functions.py | 0 {sempy_labs => src/sempy_labs}/report/_report_rebind.py | 0 48 files changed, 1 insertion(+), 1 deletion(-) rename {sempy_labs => src/sempy_labs}/__init__.py (100%) rename {sempy_labs => src/sempy_labs}/_ai.py (100%) rename {sempy_labs => src/sempy_labs}/_clear_cache.py (100%) rename {sempy_labs => src/sempy_labs}/_connections.py (100%) rename {sempy_labs => src/sempy_labs}/_dax.py (100%) rename {sempy_labs => src/sempy_labs}/_generate_semantic_model.py (100%) rename {sempy_labs => src/sempy_labs}/_helper_functions.py (100%) rename {sempy_labs => src/sempy_labs}/_icons.py (100%) rename {sempy_labs => src/sempy_labs}/_list_functions.py (100%) rename {sempy_labs => src/sempy_labs}/_model_auto_build.py (100%) rename {sempy_labs => src/sempy_labs}/_model_bpa.py (100%) rename {sempy_labs => src/sempy_labs}/_model_dependencies.py (100%) rename {sempy_labs => src/sempy_labs}/_one_lake_integration.py (100%) rename {sempy_labs => src/sempy_labs}/_query_scale_out.py (100%) rename {sempy_labs => src/sempy_labs}/_refresh_semantic_model.py (100%) rename {sempy_labs => src/sempy_labs}/_tom.py (100%) rename {sempy_labs => src/sempy_labs}/_translations.py (100%) rename {sempy_labs => src/sempy_labs}/_vertipaq.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/__init__.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_directlake_schema_compare.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_directlake_schema_sync.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_fallback.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_get_directlake_lakehouse.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_get_shared_expression.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_guardrails.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_list_directlake_model_calc_tables.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_show_unsupported_directlake_objects.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_update_directlake_model_lakehouse_connection.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_update_directlake_partition_entity.py (100%) rename {sempy_labs => src/sempy_labs}/directlake/_warm_cache.py (100%) rename {sempy_labs => src/sempy_labs}/lakehouse/__init__.py (100%) rename {sempy_labs => src/sempy_labs}/lakehouse/_get_lakehouse_columns.py (100%) rename {sempy_labs => src/sempy_labs}/lakehouse/_get_lakehouse_tables.py (100%) rename {sempy_labs => src/sempy_labs}/lakehouse/_lakehouse.py (100%) rename {sempy_labs => src/sempy_labs}/lakehouse/_shortcuts.py (100%) rename {sempy_labs => src/sempy_labs}/migration/__init__.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_create_pqt_file.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_migrate_calctables_to_lakehouse.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_migrate_calctables_to_semantic_model.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_migrate_model_objects_to_semantic_model.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_migrate_tables_columns_to_semantic_model.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_migration_validation.py (100%) rename {sempy_labs => src/sempy_labs}/migration/_refresh_calc_tables.py (100%) rename {sempy_labs => src/sempy_labs}/report/__init__.py (100%) rename {sempy_labs => src/sempy_labs}/report/_generate_report.py (100%) rename {sempy_labs => src/sempy_labs}/report/_report_functions.py (100%) rename {sempy_labs => src/sempy_labs}/report/_report_rebind.py (100%) diff --git a/pyproject.toml b/pyproject.toml index f44c9dd4..8cdf0c15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ test = [ ] [project.urls] -Repository = "https://github.com/microsoft/semantic-link-labs" +Repository = "https://github.com/microsoft/semantic-link-labs.git" [[tool.mypy.overrides]] module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*" diff --git a/sempy_labs/__init__.py b/src/sempy_labs/__init__.py similarity index 100% rename from sempy_labs/__init__.py rename to src/sempy_labs/__init__.py diff --git a/sempy_labs/_ai.py b/src/sempy_labs/_ai.py similarity index 100% rename from sempy_labs/_ai.py rename to src/sempy_labs/_ai.py diff --git a/sempy_labs/_clear_cache.py b/src/sempy_labs/_clear_cache.py similarity index 100% rename from sempy_labs/_clear_cache.py rename to src/sempy_labs/_clear_cache.py diff --git a/sempy_labs/_connections.py b/src/sempy_labs/_connections.py similarity index 100% rename from sempy_labs/_connections.py rename to src/sempy_labs/_connections.py diff --git a/sempy_labs/_dax.py b/src/sempy_labs/_dax.py similarity index 100% rename from sempy_labs/_dax.py rename to src/sempy_labs/_dax.py diff --git a/sempy_labs/_generate_semantic_model.py b/src/sempy_labs/_generate_semantic_model.py similarity index 100% rename from sempy_labs/_generate_semantic_model.py rename to src/sempy_labs/_generate_semantic_model.py diff --git a/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py similarity index 100% rename from sempy_labs/_helper_functions.py rename to src/sempy_labs/_helper_functions.py diff --git a/sempy_labs/_icons.py b/src/sempy_labs/_icons.py similarity index 100% rename from sempy_labs/_icons.py rename to src/sempy_labs/_icons.py diff --git a/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py similarity index 100% rename from sempy_labs/_list_functions.py rename to src/sempy_labs/_list_functions.py diff --git a/sempy_labs/_model_auto_build.py b/src/sempy_labs/_model_auto_build.py similarity index 100% rename from sempy_labs/_model_auto_build.py rename to src/sempy_labs/_model_auto_build.py diff --git a/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py similarity index 100% rename from sempy_labs/_model_bpa.py rename to src/sempy_labs/_model_bpa.py diff --git a/sempy_labs/_model_dependencies.py b/src/sempy_labs/_model_dependencies.py similarity index 100% rename from sempy_labs/_model_dependencies.py rename to src/sempy_labs/_model_dependencies.py diff --git a/sempy_labs/_one_lake_integration.py b/src/sempy_labs/_one_lake_integration.py similarity index 100% rename from sempy_labs/_one_lake_integration.py rename to src/sempy_labs/_one_lake_integration.py diff --git a/sempy_labs/_query_scale_out.py b/src/sempy_labs/_query_scale_out.py similarity index 100% rename from sempy_labs/_query_scale_out.py rename to src/sempy_labs/_query_scale_out.py diff --git a/sempy_labs/_refresh_semantic_model.py b/src/sempy_labs/_refresh_semantic_model.py similarity index 100% rename from sempy_labs/_refresh_semantic_model.py rename to src/sempy_labs/_refresh_semantic_model.py diff --git a/sempy_labs/_tom.py b/src/sempy_labs/_tom.py similarity index 100% rename from sempy_labs/_tom.py rename to src/sempy_labs/_tom.py diff --git a/sempy_labs/_translations.py b/src/sempy_labs/_translations.py similarity index 100% rename from sempy_labs/_translations.py rename to src/sempy_labs/_translations.py diff --git a/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py similarity index 100% rename from sempy_labs/_vertipaq.py rename to src/sempy_labs/_vertipaq.py diff --git a/sempy_labs/directlake/__init__.py b/src/sempy_labs/directlake/__init__.py similarity index 100% rename from sempy_labs/directlake/__init__.py rename to src/sempy_labs/directlake/__init__.py diff --git a/sempy_labs/directlake/_directlake_schema_compare.py b/src/sempy_labs/directlake/_directlake_schema_compare.py similarity index 100% rename from sempy_labs/directlake/_directlake_schema_compare.py rename to src/sempy_labs/directlake/_directlake_schema_compare.py diff --git a/sempy_labs/directlake/_directlake_schema_sync.py b/src/sempy_labs/directlake/_directlake_schema_sync.py similarity index 100% rename from sempy_labs/directlake/_directlake_schema_sync.py rename to src/sempy_labs/directlake/_directlake_schema_sync.py diff --git a/sempy_labs/directlake/_fallback.py b/src/sempy_labs/directlake/_fallback.py similarity index 100% rename from sempy_labs/directlake/_fallback.py rename to src/sempy_labs/directlake/_fallback.py diff --git a/sempy_labs/directlake/_get_directlake_lakehouse.py b/src/sempy_labs/directlake/_get_directlake_lakehouse.py similarity index 100% rename from sempy_labs/directlake/_get_directlake_lakehouse.py rename to src/sempy_labs/directlake/_get_directlake_lakehouse.py diff --git a/sempy_labs/directlake/_get_shared_expression.py b/src/sempy_labs/directlake/_get_shared_expression.py similarity index 100% rename from sempy_labs/directlake/_get_shared_expression.py rename to src/sempy_labs/directlake/_get_shared_expression.py diff --git a/sempy_labs/directlake/_guardrails.py b/src/sempy_labs/directlake/_guardrails.py similarity index 100% rename from sempy_labs/directlake/_guardrails.py rename to src/sempy_labs/directlake/_guardrails.py diff --git a/sempy_labs/directlake/_list_directlake_model_calc_tables.py b/src/sempy_labs/directlake/_list_directlake_model_calc_tables.py similarity index 100% rename from sempy_labs/directlake/_list_directlake_model_calc_tables.py rename to src/sempy_labs/directlake/_list_directlake_model_calc_tables.py diff --git a/sempy_labs/directlake/_show_unsupported_directlake_objects.py b/src/sempy_labs/directlake/_show_unsupported_directlake_objects.py similarity index 100% rename from sempy_labs/directlake/_show_unsupported_directlake_objects.py rename to src/sempy_labs/directlake/_show_unsupported_directlake_objects.py diff --git a/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py b/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py similarity index 100% rename from sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py rename to src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py diff --git a/sempy_labs/directlake/_update_directlake_partition_entity.py b/src/sempy_labs/directlake/_update_directlake_partition_entity.py similarity index 100% rename from sempy_labs/directlake/_update_directlake_partition_entity.py rename to src/sempy_labs/directlake/_update_directlake_partition_entity.py diff --git a/sempy_labs/directlake/_warm_cache.py b/src/sempy_labs/directlake/_warm_cache.py similarity index 100% rename from sempy_labs/directlake/_warm_cache.py rename to src/sempy_labs/directlake/_warm_cache.py diff --git a/sempy_labs/lakehouse/__init__.py b/src/sempy_labs/lakehouse/__init__.py similarity index 100% rename from sempy_labs/lakehouse/__init__.py rename to src/sempy_labs/lakehouse/__init__.py diff --git a/sempy_labs/lakehouse/_get_lakehouse_columns.py b/src/sempy_labs/lakehouse/_get_lakehouse_columns.py similarity index 100% rename from sempy_labs/lakehouse/_get_lakehouse_columns.py rename to src/sempy_labs/lakehouse/_get_lakehouse_columns.py diff --git a/sempy_labs/lakehouse/_get_lakehouse_tables.py b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py similarity index 100% rename from sempy_labs/lakehouse/_get_lakehouse_tables.py rename to src/sempy_labs/lakehouse/_get_lakehouse_tables.py diff --git a/sempy_labs/lakehouse/_lakehouse.py b/src/sempy_labs/lakehouse/_lakehouse.py similarity index 100% rename from sempy_labs/lakehouse/_lakehouse.py rename to src/sempy_labs/lakehouse/_lakehouse.py diff --git a/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py similarity index 100% rename from sempy_labs/lakehouse/_shortcuts.py rename to src/sempy_labs/lakehouse/_shortcuts.py diff --git a/sempy_labs/migration/__init__.py b/src/sempy_labs/migration/__init__.py similarity index 100% rename from sempy_labs/migration/__init__.py rename to src/sempy_labs/migration/__init__.py diff --git a/sempy_labs/migration/_create_pqt_file.py b/src/sempy_labs/migration/_create_pqt_file.py similarity index 100% rename from sempy_labs/migration/_create_pqt_file.py rename to src/sempy_labs/migration/_create_pqt_file.py diff --git a/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py similarity index 100% rename from sempy_labs/migration/_migrate_calctables_to_lakehouse.py rename to src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py diff --git a/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py similarity index 100% rename from sempy_labs/migration/_migrate_calctables_to_semantic_model.py rename to src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py diff --git a/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py similarity index 100% rename from sempy_labs/migration/_migrate_model_objects_to_semantic_model.py rename to src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py diff --git a/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py similarity index 100% rename from sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py rename to src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py diff --git a/sempy_labs/migration/_migration_validation.py b/src/sempy_labs/migration/_migration_validation.py similarity index 100% rename from sempy_labs/migration/_migration_validation.py rename to src/sempy_labs/migration/_migration_validation.py diff --git a/sempy_labs/migration/_refresh_calc_tables.py b/src/sempy_labs/migration/_refresh_calc_tables.py similarity index 100% rename from sempy_labs/migration/_refresh_calc_tables.py rename to src/sempy_labs/migration/_refresh_calc_tables.py diff --git a/sempy_labs/report/__init__.py b/src/sempy_labs/report/__init__.py similarity index 100% rename from sempy_labs/report/__init__.py rename to src/sempy_labs/report/__init__.py diff --git a/sempy_labs/report/_generate_report.py b/src/sempy_labs/report/_generate_report.py similarity index 100% rename from sempy_labs/report/_generate_report.py rename to src/sempy_labs/report/_generate_report.py diff --git a/sempy_labs/report/_report_functions.py b/src/sempy_labs/report/_report_functions.py similarity index 100% rename from sempy_labs/report/_report_functions.py rename to src/sempy_labs/report/_report_functions.py diff --git a/sempy_labs/report/_report_rebind.py b/src/sempy_labs/report/_report_rebind.py similarity index 100% rename from sempy_labs/report/_report_rebind.py rename to src/sempy_labs/report/_report_rebind.py From e83bfcb4572b64a91e865c81dfd76fd253ff7674 Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 11 Jun 2024 13:25:33 +0000 Subject: [PATCH 21/23] fix source path --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 7918a843..6ef59f26 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -16,7 +16,7 @@ build: # golang: "1.19" jobs: pre_build: - - sphinx-apidoc -f -o docs/source sempy_labs/ + - sphinx-apidoc -f -o docs/source src/sempy_labs/ # Build documentation in the "docs/" directory with Sphinx sphinx: From fb20ea3e1678c0f5c2d038ab1978fe09f421932e Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 11 Jun 2024 13:43:26 +0000 Subject: [PATCH 22/23] fix source path --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index c55d412f..8001986a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,6 +1,6 @@ import os import sys -sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath('../../src/')) # Configuration file for the Sphinx documentation builder. # From a77ad3fc041f49c332f5d8dec4ff0bb2b7cdedff Mon Sep 17 00:00:00 2001 From: Markus Cozowicz Date: Tue, 11 Jun 2024 14:20:18 +0000 Subject: [PATCH 23/23] fix duplicate link labels --- src/sempy_labs/_tom.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sempy_labs/_tom.py b/src/sempy_labs/_tom.py index 8f3d1fcf..c5a163ce 100644 --- a/src/sempy_labs/_tom.py +++ b/src/sempy_labs/_tom.py @@ -609,7 +609,7 @@ def set_ols( Name of the column. permission : str The object level security permission for the column. - `Valid values `_ + `Permission valid values `_ """ import Microsoft.AnalysisServices.Tabular as TOM import System @@ -995,7 +995,7 @@ def set_alternate_of( Name of the column. summarization_type : str The summarization type for the column. - `Valid values `_ + `Summarization valid values `_ base_table : str Name of the base table for aggregation. base_column : str @@ -1180,7 +1180,7 @@ def set_extended_property( An object (i.e. table/column/measure) within a semantic model. extended_property_type : str The extended property type. - `Valid values `_ + `Extended property valid values `_ name : str Name of the extended property. value : str @@ -2396,7 +2396,7 @@ def set_summarize_by( value : bool, default=None The SummarizeBy property value. Defaults to none which resolves to 'Default'. - `Valid values `_ + `Aggregate valid values `_ """ import System @@ -2438,7 +2438,7 @@ def set_direct_lake_behavior(self, direct_lake_behavior: str): ---------- direct_lake_behavior : str The DirectLakeBehavior property value. - `Valid values `_ + `DirectLakeBehavior valid values `_ """ import System @@ -3585,7 +3585,7 @@ def set_encoding_hint(self, table_name: str, column_name: str, value: str): Name of the column. value : str Encoding hint value. - `Valid values `_ + `Encoding hint valid values `_ """ import Microsoft.AnalysisServices.Tabular as TOM import System @@ -3615,7 +3615,7 @@ def set_data_type(self, table_name: str, column_name: str, value: str): Name of the column. value : str The data type. - `Valid values `_ + `Data type valid values `_ """ import System