diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 72b028e7..6ef59f26 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -14,6 +14,9 @@ build: # nodejs: "19" # rust: "1.64" # golang: "1.19" + jobs: + pre_build: + - sphinx-apidoc -f -o docs/source src/sempy_labs/ # Build documentation in the "docs/" directory with Sphinx sphinx: diff --git a/README.md b/README.md index 1a119284..1c5c9649 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # semantic-link-labs [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs) +[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.4.1&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs) + This is a python library intended to be used in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library was originally intended to contain functions used for [migrating semantic models to Direct Lake mode](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration-1). However, it quickly became apparent that functions within such a library could support many other useful activities in the realm of semantic models, reports, lakehouses and really anything Fabric-related. As such, this library contains a variety of functions ranging from running [Vertipaq Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#vertipaq_analyzer) or the [Best Practice Analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#run_model_bpa) against a semantic model to seeing if any [lakehouse tables hit Direct Lake guardrails](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#get_lakehouse_tables) or accessing the [Tabular Object Model](https://github.com/microsoft/semantic-link-labs/#tabular-object-model-tom) and more! @@ -214,3821 +218,6 @@ An even better way to ensure the semantic-link-labs library is available in your * [is_field_parameter](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#is_field_parameter) -# Functions -## cancel_dataset_refresh -#### Cancels the refresh of a semantic model which was executed via the [Enhanced Refresh API](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh). -```python -import semantic-link-labs as labs -labs.cancel_dataset_refresh( - dataset = 'MyReport', - #request_id = None, - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **request_id** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The [request id](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh#response-properties) of a semantic model refresh. Defaults to finding the latest active refresh of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## check_fallback_reason -#### Shows the reason a table in a Direct Lake semantic model would fallback to Direct Query. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.check_fallback_reason( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> Pandas dataframe showing the tables in the semantic model and their fallback reason. - ---- -## clear_cache -#### Clears the cache of a semantic model. -```python -import semantic-link-labs as labs -labs.clear_cache( - dataset = 'AdventureWorks', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## clone_report -#### Makes a clone of a Power BI report -```python -import semantic-link-labs as labs -labs.clone_report( - report = 'MyReport', - cloned_report = 'MyNewReport', - #workspace = None, - #target_workspace = None, - #target_dataset = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report to be cloned. -> -> **cloned_report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the new report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the original report resides. -> -> **target_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the new report will reside. Defaults to using the workspace in which the original report resides. -> -> **target_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The semantic model from which the new report will be connected. Defaults to using the semantic model used by the original report. -### Returns -> A printout stating the success/failure of the operation. - ---- -## control_fallback -#### Set the DirectLakeBehavior for a semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.control_fallback( - dataset = 'AdventureWorks', - direct_lake_behavior = 'DirectLakeOnly', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **direct_lake_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Setting for Direct Lake Behavior. Options: ('Automatic', 'DirectLakeOnly', 'DirectQueryOnly'). -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_blank_semantic_model -#### Creates a new blank semantic model (no tables/columns etc.). -```python -import semantic-link-labs as labs -labs.create_blank_semantic_model( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **compatibility_level** [int](https://docs.python.org/3/library/functions.html#int) -> ->> Optional; Setting for the compatibility level of the semantic model. Default value: 1605. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_pqt_file -#### Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. -```python -import semantic-link-labs as labs -labs.create_pqt_file( - dataset = 'AdventureWorks', - #file_name = 'PowerQueryTemplate', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **file_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; TName of the Power Query Template (.pqt) file to be created. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_report_from_reportjson -#### Creates a report based on a report.json file (and an optional themes.json file). -```python -import semantic-link-labs as labs -labs.create_report_from_reportjson( - report = 'MyReport', - dataset = 'AdventureWorks', - report_json = '', - #theme_json = '', - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to connect to the report. -> -> **report_json** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The report.json file to be used to create the report. -> -> **theme_json** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The theme.json file to be used for the theme of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_semantic_model_from_bim -#### Creates a new semantic model based on a Model.bim file. -```python -import semantic-link-labs as labs -labs.create_semantic_model_from_bim( - dataset = 'AdventureWorks', - bim_file = '', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **bim_file** [Dict](https://docs.python.org/3/library/typing.html#typing.Dict) or [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The model.bim file to be used to create the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_shortcut_onelake -#### Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to a delta table in OneLake. -```python -import semantic-link-labs as labs -labs.create_shortcut_onelake( - table_name = 'DimCalendar', - source_lakehouse = 'Lakehouse1', - source_workspace = 'Workspace1', - destination_lakehouse = 'Lakehouse2', - #destination_workspace = '', - shortcut_name = 'Calendar' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The table name for which a shortcut will be created. -> -> **source_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The lakehouse in which the table resides. -> -> **sourceWorkspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The workspace where the source lakehouse resides. -> -> **destination_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The lakehouse where the shortcut will be created. -> -> **destination_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the shortcut will be created. Defaults to the 'sourceWorkspaceName' parameter value. -> -> **shortcut_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the shortcut 'table' to be created. This defaults to the 'tableName' parameter value. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## create_warehouse -#### Creates a warehouse in Fabric. -```python -import semantic-link-labs as labs -labs.create_warehouse( - warehouse = 'MyWarehouse', - workspace = None - ) -``` -### Parameters -> **warehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the warehouse. -> -> **description** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; Description of the warehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the warehouse will reside. -### Returns -> A printout stating the success/failure of the operation. - ---- -## delete_shortcut -#### Deletes a [OneLake shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts). -```python -import semantic-link-labs as labs -labs.delete_shortcut( - shortcut_name = 'DimCalendar', - lakehouse = 'Lakehouse1', - workspace = 'Workspace1' - ) -``` -### Parameters -> **shortcut_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the OneLake shortcut to delete. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse in which the shortcut resides. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## direct_lake_schema_compare -#### Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.direct_lake_schema_compare( - dataset = 'AdventureWorks', - workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> Shows tables/columns which exist in the semantic model but do not exist in the corresponding lakehouse. - ---- -## direct_lake_schema_sync -#### Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model). -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.direct_lake_schema_sync( - dataset = 'AdvWorks', - add_to_model = True, - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **add_to_model** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds columns which exist in the lakehouse but do not exist in the semantic model. No new tables are added. Default value: False. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> A list of columns which exist in the lakehouse but not in the Direct Lake semantic model. If 'add_to_model' is set to True, a printout stating the success/failure of the operation is returned. - ---- -## export_model_to_onelake -#### Exports a semantic model's tables to delta tables in the lakehouse. Creates shortcuts to the tables if a lakehouse is specified. -> [!IMPORTANT] -> This function requires: -> -> [XMLA read/write](https://learn.microsoft.com/power-bi/enterprise/service-premium-connect-tools#enable-xmla-read-write) to be enabled on the Fabric capacity. -> -> [OneLake Integration](https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration) feature to be enabled within the semantic model settings. -```python -import semantic-link-labs as labs -labs.export_model_to_onelake( - dataset = 'AdventureWorks', - workspace = None, - destination_lakehouse = 'Lakehouse2', - destination_workspace = 'Workspace2' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **destination_lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. -> -> **destination_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## export_report -#### Exports a Power BI report to a file in your lakehouse. -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #file_name = None, - #bookmark_name = None, - #page_name = None, - #visual_name = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #file_name = 'Exports\MyReport', - #bookmark_name = None, - #page_name = 'ReportSection293847182375', - #visual_name = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = 'ReportSection293847182375', - #report_filter = "'Product Category'[Color] in ('Blue', 'Orange') and 'Calendar'[CalendarYear] <= 2020", - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = ['ReportSection293847182375', 'ReportSection4818372483347'], - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.export_report( - report = 'AdventureWorks', - export_format = 'PDF', - #page_name = ['ReportSection293847182375', 'ReportSection4818372483347'], - #visual_name = ['d84793724739', 'v834729234723847'], - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **export_format** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. -> -> **file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the file to be saved within the lakehouse. Do **not** include the file extension. Defaults ot the reportName parameter value. -> -> **bookmark_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of a bookmark within the report. -> -> **page_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of the report page. -> -> **visual_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. -> -> **report_filter** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -### Returns -> A printout stating the success/failure of the operation. - ---- -## generate_embedded_filter -#### Runs a DAX query against a semantic model. -```python -import semantic-link-labs as labs -labs.generate_embedded_filter( - filter = "'Product'[Product Category] = 'Bikes' and 'Geography'[Country Code] in (3, 6, 10)" - ) -``` -### Parameters -> **filter** [str](https://docs.python.org/3/library/stdtypes.html#str) -### Returns -> A string converting the filter into an [embedded filter](https://learn.microsoft.com/power-bi/collaborate-share/service-url-filters) - ---- -## get_direct_lake_guardrails -#### Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query based on Microsoft's online documentation. -```python -import semantic-link-labs as labs -labs.get_direct_lake_guardrails() -``` -### Parameters -None -### Returns -> A table showing the Direct Lake guardrails by SKU. - ---- -## get_directlake_guardrails_for_sku -#### Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity. -*Use the result of the 'get_sku_size' function as an input for this function's skuSize parameter.* -```python -import semantic-link-labs as labs -labs.get_directlake_guardrails_for_sku( - sku_size = '' - ) -``` -### Parameters -> **sku_size** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Sku size of a workspace/capacity -### Returns -> A table showing the Direct Lake guardrails for the given SKU. - ---- -## get_direct_lake_lakehouse -#### Identifies the lakehouse used by a Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.get_direct_lake_lakehouse( - dataset = 'AdventureWorks', - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse used by the semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. - ---- -## get_direct_lake_sql_endpoint -#### Identifies the lakehouse used by a Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.get_direct_lake_sql_endpoint( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the SQL Endpoint ID for a Direct Lake semantic model. - ---- -## get_lakehouse_columns -#### Shows the tables and columns of a lakehouse and their respective properties. -```python -import semantic-link-labs as labs -labs.get_lakehouse_columns( - lakehouse = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the tables/columns within a lakehouse and their properties. - ---- -## get_lakehouse_tables -#### Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. -```python -import semantic-link-labs as labs -labs.get_lakehouse_tables( - lakehouse = 'MyLakehouse', - workspace = 'NewWorkspace', - extended = True, - count_rows = True) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -> -> **extended** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds the following additional table properties \['Files', 'Row Groups', 'Table Size', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail'\]. Also indicates the SKU for the workspace and whether guardrails are hit. Default value: False. -> -> **count_rows** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Adds an additional column showing the row count of each table. Default value: False. -> -> **export** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; If specified as True, the resulting dataframe will be exported to a delta table in your lakehouse. -### Returns -> A pandas dataframe showing the delta tables within a lakehouse and their properties. - ---- -## get_measure_dependencies -#### Shows all dependencies for all measures in a semantic model -```python -import semantic-link-labs as labs -labs.get_measure_dependencies( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing all dependencies for all measures in the semantic model. - ---- -## get_model_calc_dependencies -#### Shows all dependencies for all objects in a semantic model -```python -import semantic-link-labs as labs -labs.get_model_calc_dependencies( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing all dependencies for all objects in the semantic model. - ---- -## get_object_level_security -#### Shows a list of columns used in object level security. -```python -import semantic-link-labs as labs -labs.get_object_level_security( - dataset = 'AdventureWorks', - workspace = '') -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The semantic model name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing the columns used in object level security within a semantic model. - ---- -## get_report_json -#### Gets the report.json file content of a Power BI report. -```python -import semantic-link-labs as labs -labs.get_report_json( - report = 'MyReport', - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.get_report_json( - report = 'MyReport', - #workspace = None, - save_to_file_name = 'MyFileName' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -> **save_to_file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying this parameter will save the report.json file to your lakehouse with the file name of this parameter. -### Returns -> The report.json file for a given Power BI report. - ---- -## get_semantic_model_bim -#### Extracts the Model.bim file for a given semantic model. -```python -import semantic-link-labs as labs -labs.get_semantic_model_bim( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.get_semantic_model_bim( - dataset = 'AdventureWorks', - #workspace = None, - save_to_file_name = 'MyFileName' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **save_to_file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying this parameter will save the model.bim file to your lakehouse with the file name of this parameter. -### Returns -> The model.bim file for a given semantic model. - ---- -## get_shared_expression -#### Dynamically generates the M expression used by a Direct Lake model for a given lakehouse. -```python -import semantic-link-labs as labs -labs.get_shared_expression( - lakehouse = '', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse name. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string showing the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint. - ---- -## get_sku_size -#### Shows the SKU size for a workspace. -```python -import semantic-link-labs as labs -labs.get_sku_size( - workspace = '' - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the SKU size for a workspace. - ---- -## import_vertipaq_analyzer -#### Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse. -```python -import semantic-link-labs as labs -labs.import_vertipaq_analyzer( - folder_path = '/lakehouse/default/Files/VertipaqAnalyzer', - file_name = 'Workspace Name-DatasetName.zip' - ) -``` -### Parameters -> **folder_path** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved. -> -> **file_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; File name of the file which contains the vertipaq analyzer info. - ---- -## launch_report -#### Shows a Power BI report within a Fabric notebook. -```python -import semantic-link-labs as labs -labs.launch_report( - report = 'MyReport', - #workspace = None - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the workspace in which the report resides. - ---- -## list_dashboards -#### Shows the dashboards within the workspace. -```python -import semantic-link-labs as labs -labs.list_dashboards( - #workspace = '' - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace name. -### Returns -> A pandas dataframe showing the dashboards which exist in the workspace. - ---- -## list_dataflow_storage_accounts -#### Shows the dataflow storage accounts. -```python -import semantic-link-labs as labs -labs.list_dataflow_storage_accounts() -``` -### Parameters -None -### Returns -> A pandas dataframe showing the accessible dataflow storage accounts. -> ---- -## list_direct_lake_model_calc_tables -#### Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.list_direct_lake_model_calc_tables( - dataset = 'AdventureWorks', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe showing the calculated tables which were migrated to Direct Lake and whose DAX expressions are stored as model annotations. - ---- -## list_lakehouses -#### Shows the properties associated with lakehouses in a workspace. -```python -import semantic-link-labs as labs -labs.list_lakehouses( - workspace = None - ) -``` -### Parameters -> **workspaceName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the properties of a all lakehouses in a workspace. - ---- -## list_semantic_model_objects -#### Shows a list of semantic model objects. -```python -import semantic-link-labs as labs -labs.list_semantic_model_objects( - dataset = 'AdvWorks', - workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A dataframe showing a list of objects in the semantic model - ---- -## list_shortcuts -#### Shows the shortcuts within a lakehouse (*note: the API behind this function is not yet available. The function will work as expected once the API is officially released*) -```python -import semantic-link-labs as labs -labs.list_shortcuts( - lakehouse = 'MyLakehouse', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A pandas dataframe showing the shortcuts which exist in a given lakehouse and their properties. - ---- -## list_warehouses -#### Shows the warehouss within a workspace. -```python -import semantic-link-labs as labs -labs.list_warehouses( - #workspace = None - ) -``` -### Parameters -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace name. -### Returns -> A pandas dataframe showing the warehouses which exist in a given workspace and their properties. - ---- -## measure_dependency_tree -#### Shows a measure dependency tree of all dependent objects for a measure in a semantic model. -```python -import semantic-link-labs as labs -labs.measure_dependency_tree( - dataset = 'AdventureWorks', - measure_name = 'Sales Amount', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **measure_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the measure to use for building a dependency tree. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A tree view showing the dependencies for a given measure within the semantic model. - ---- -## migrate_calc_tables_to_lakehouse -#### Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_calc_tables_to_lakehouse( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_calc_tables_to_semantic_model -#### Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_calc_tables_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_field_parameters -#### Migrates field parameters from one semantic model to another. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_field_parameters( - dataset = 'AdventureWorks', - new_dataset = '', - #workspace = '', - #new_dataset_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_model_objects_to_semantic_model -#### Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_model_objects_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = '', - #workspace = '', - #new_dataset_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migrate_tables_columns_to_semantic_model -#### Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. -> [!NOTE] -> This function is specifically relevant for import/DirectQuery migration to Direct Lake -```python -import semantic-link-labs as labs -labs.migrate_tables_columns_to_semantic_model( - dataset = 'AdventureWorks', - new_dataset = 'AdventureWorksDL', - #workspace = '', - #new_dataset_workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The lakehouse to be used by the Direct Lake semantic model. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## migration_validation -#### Shows the objects in the original semantic model and whether then were migrated successfully or not. -```python -import semantic-link-labs as labs -labs.migration_validation( - dataset = 'AdvWorks', - new_dataset = 'AdvWorksDL', - workspace = None, - new_dataset_workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the import/DirectQuery semantic model. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the Direct Lake semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace to be used by the Direct Lake semantic model. -### Returns -> A dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. - ---- -## model_bpa_rules -#### Shows the default Best Practice Rules for the semantic model used by the [run_model_bpa](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#run_model_bpa) function -```python -import semantic-link-labs as labs -labs.model_bpa_rules() -``` -### Returns -> A pandas dataframe showing the default semantic model best practice rules. - ---- -## optimize_lakehouse_tables -#### Runs the [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) function over the specified lakehouse tables. -```python -import semantic-link-labs as labs -labs.optimize_lakehouse_tables( - tables = ['Sales', 'Calendar'], - #lakehouse = None, - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.optimize_lakehouse_tables( - tables = None, - #lakehouse = 'MyLakehouse', - #workspace = None - ) -``` -### Parameters -> **tables** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name(s) of the lakehouse delta table(s) to optimize. If 'None' is entered, all of the delta tables in the lakehouse will be queued to be optimized. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## refresh_calc_tables -#### Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.refresh_calc_tables( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## refresh_semantic_model -#### Performs a refresh on a semantic model. -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - refresh_type = 'full', - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - tables = ['Sales', 'Geography'], - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - partitions = ["'Sales'[Sales - 2024]", "'Sales'[Sales - 2023]"], - workspace = None -) -``` -```python -import semantic-link-labs as labs -labs.refresh_semantic_model( - dataset = 'AdventureWorks', - tables = ['Geography'], - partitions = ["'Sales'[Sales - 2024]", "'Sales'[Sales - 2023]"], - workspace = None -) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. If no tables/partitions are specified, the entire semantic model is refreshed. -> -> **tables** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Tables to refresh. -> -> **partitions** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Partitions to refresh. Must be in "'Table'[Partition]" format. -> -> **refresh_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Type of processing to perform. Options: ('full', 'automatic', 'dataOnly', 'calculate', 'clearValues', 'defragment'). Default value: 'full'. -> -> **retry_count** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; Number of retry attempts. Default is 0. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## report_rebind -#### Rebinds a report to a semantic model. -```python -import semantic-link-labs as labs -labs.report_rebind( - report = '', - dataset = '', - #report_workspace = '', - #dataset_workspace = '' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to rebind to the report. -> -> **report_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -> -> **dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## report_rebind_all -#### Rebinds all reports in a workspace which are bound to a specific semantic model to a new semantic model. -```python -import semantic-link-labs as labs -labs.report_rebind_all( - dataset = '', - new_dataset = '', - #dataset_workspace = '' , - #new_dataset_workspace = '' , - #report_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model currently binded to the reports. -> -> **new_dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model to rebind to the reports. -> -> **dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the original semantic model resides. -> -> **new_dataset_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the new semantic model resides. -> -> **report_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the reports reside. -### Returns -> A printout stating the success/failure of the operation. - ---- -## resolve_lakehouse_name -#### Returns the name of the lakehouse for a given lakehouse Id. -```python -import semantic-link-labs as labs -labs.resolve_lakehouse_name( - lakehouse_id = '', - #workspace = '' - ) -``` -### Parameters -> **lakehouse_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string containing the lakehouse name. - ---- -## resolve_lakehouse_id -#### Returns the ID of a given lakehouse. -```python -import semantic-link-labs as labs -labs.resolve_lakehouse_id( - lakehouse = 'MyLakehouse', - #workspace = '' - ) -``` -### Parameters -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A string conaining the lakehouse ID. - ---- -## resolve_dataset_id -#### Returns the ID of a given semantic model. -```python -import semantic-link-labs as labs -labs.resolve_dataset_id( - dataset = 'MyReport', - #workspace = '' - ) -``` -### Parameters -> **datasetName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspaceName** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the semantic model ID. - ---- -## resolve_dataset_name -#### Returns the name of a given semantic model ID. -```python -import semantic-link-labs as labs -labs.resolve_dataset_name( - dataset_id = '', - #workspace = '' - ) -``` -### Parameters -> **dataset_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A string containing the semantic model name. - ---- -## resolve_report_id -#### Returns the ID of a given report. -```python -import semantic-link-labs as labs -labs.resolve_report_id( - report = 'MyReport', - #workspace = '' - ) -``` -### Parameters -> **report** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -### Returns -> A string containing the report ID. - ---- -## resolve_report_name -#### Returns the name of a given report ID. -```python -import semantic-link-labs as labs -labs.resolve_report_name( - report_id = '', - #workspace = '' - ) -``` -### Parameters -> **report_id** [UUID](https://docs.python.org/3/library/uuid.html#uuid.UUID) -> ->> Required; UUID object representing a report. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the report resides. -### Returns -> A string containing the report name. - ---- -## run_dax -#### Runs a DAX query against a semantic model. -```python -import semantic-link-labs as labs -labs.run_dax( - dataset = 'AdventureWorks', - dax_query = 'Internet Sales', - user_name = 'FACT_InternetSales', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **dax_query** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX query to be executed. -> -> **user_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A pandas dataframe with the results of the DAX query. - ---- -## run_model_bpa -#### Runs the Best Practice Rules against a semantic model. -```python -import semantic-link-labs as labs -labs.run_model_bpa( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **rules_dataframe** -> ->> Optional; A pandas dataframe including rules to be analyzed. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **return_dataframe** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Returns a pandas dataframe instead of the visualization. -> -> **export** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Exports the results to a delta table in the lakehouse. -### Returns -> A visualization showing objects which violate each [Best Practice Rule](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) by rule category. - ---- -## save_as_delta_table -#### Saves a dataframe as a delta table in the lakehouse -```python -import semantic-link-labs as labs -labs.save_as_delta_table( - dataframe = df, - delta_table_name = 'MyNewTable', - write_mode = 'overwrite', - lakehouse = None, - workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.save_as_delta_table( - dataframe = df, - delta_table_name = 'MyNewTable', - write_mode = 'append', - lakehouse = None, - workspace = None - ) -``` -### Parameters -> **dataframe** [DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) -> ->> Required; The dataframe to save as a delta table. -> -> **delta_table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the delta table to save the dataframe. -> -> **write_mode** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Options: 'append' or 'overwrite'. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional: The name of the lakehouse in which the delta table will be saved. Defaults to the default lakehouse attached to the notebook. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. Defaults to the workspace in which the notebook resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## show_unsupported_direct_lake_objects -#### Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). -```python -import semantic-link-labs as labs -labs.show_unsupported_direct_lake_objects( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> 3 [pandas dataframes](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html) showing objects (tables/columns/relationships) within the semantic model which are currently not supported by Direct Lake mode. - ---- -## translate_semantic_model -#### Translates names, descriptions, display folders for all objects in a semantic model. -```python -import semantic-link-labs as labs -labs.translate_semantic_model( - dataset = 'AdventureWorks', - languages = ['it_IT', 'fr-FR'], - #workspace = None - ) -``` -```python -import semantic-link-labs as labs -labs.translate_semantic_model( - dataset = 'AdventureWorks', - languages = ['it_IT', 'fr-FR'], - exclude_characters = '_-', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **languages** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; [Language code(s)](https://learn.microsoft.com/azure/ai-services/translator/language-support) to translate. -> -> **exclude_characters** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Any character in this string will be replaced by a space when given to the AI translator. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_direct_lake_model_lakehouse_connection -#### Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.update_direct_lake_model_lakehouse_connection( - dataset = '', - #lakehouse = '', - #workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_direct_lake_partition_entity -#### Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.update_direct_lake_partition_entity( - dataset = 'AdventureWorks', - table_name = 'Internet Sales', - entity_name = 'FACT_InternetSales', - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -```python -import semantic-link-labs as labs -labs.update_direct_lake_partition_entity( - dataset = 'AdventureWorks', - table_name = ['Internet Sales', 'Geography'], - entity_name = ['FACT_InternetSales', 'DimGeography'], - #workspace = '', - #lakehouse = '', - #lakehouse_workspace = '' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the table in the semantic model. -> -> **entity_name** [str](https://docs.python.org/3/library/stdtypes.html#str) or [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the lakehouse table to be mapped to the semantic model table. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **lakehouse** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Name of the lakehouse. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the lakehouse resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## update_item -#### Creates a warehouse in Fabric. -```python -import semantic-link-labs as labs -labs.update_item( - item_type = 'Lakehouse', - current_name = 'MyLakehouse', - new_name = 'MyNewLakehouse', - #description = 'This is my new lakehouse', - #workspace = None - ) -``` -### Parameters -> **item_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Type of item to update. Valid options: 'DataPipeline', 'Eventstream', 'KQLDatabase', 'KQLQueryset', 'Lakehouse', 'MLExperiment', 'MLModel', 'Notebook', 'Warehouse'. -> -> **current_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Required; Current name of the item. -> -> **new_name** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Required; New name of the item. -> -> **description** [str](https://docs.python.org/3/library/functions.html#str) -> ->> Optional; New description of the item. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the item resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## vertipaq_analyzer -#### Extracts the vertipaq analyzer statistics from a semantic model. -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = None - ) -``` - -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = 'zip' - ) -``` - -```python -import semantic-link-labs as labs -labs.vertipaq_analyzer( - dataset = 'AdventureWorks', - #workspace = '', - export = 'table' - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -> -> **export** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the [import_vertipaq_analyzer](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#import_vertipaq_analyzer) function. Specifying 'table' will export the results to delta tables (appended) in your lakehouse. Default value: None. -> -> **lakehouse_workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace in which the lakehouse used by a Direct Lake semantic model resides. -> -> **read_stats_from_data** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse). -### Returns -> A visualization of the Vertipaq Analyzer statistics. - ---- -## warm_direct_lake_cache_perspective -#### Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.warm_direct_lake_cache_perspective( - dataset = 'AdventureWorks', - perspective = 'WarmCache', - add_dependencies = True, - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **perspective** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the perspective which contains objects to be used for warming the cache. -> -> **add_dependencies** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Includes object dependencies in the cache warming process. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. - ---- -## warm_direct_lake_cache_isresident -#### Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. -> [!NOTE] -> This function is only relevant to semantic models in Direct Lake mode. -```python -import semantic-link-labs as labs -labs.warm_direct_lake_cache_isresident( - dataset = 'AdventureWorks', - #workspace = None - ) -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; Name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The workspace where the semantic model resides. -### Returns -> A printout stating the success/failure of the operation. ---- - -# semantic-link-labs.TOM Functions - -## connect_semantic_model -#### Forms the connection to the Tabular Object Model (TOM) for a semantic model -```python -with connect_semantic_model(dataset ='AdventureWorks', workspace = None, readonly = True) as tom: -``` -```python -with connect_semantic_model(dataset ='AdventureWorks', workspace = None, readonly = False) as tom: -``` -### Parameters -> **dataset** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the semantic model. -> -> **workspace** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The name of the workspace in which the semantic model resides. Defaults to the workspace in which the notebook resides. -> -> **readonly** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting this to true uses a read only mode of TOM. Setting this to false enables read/write and saves any changes made to the semantic model. Default value: True. - -## add_calculated_column -#### Adds a calculated column to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_column( - table_name = 'Segment', - column_name = 'Business Segment', - expression = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table where the column will be added. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculated column. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression for the calculated column. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the calculated column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The formats strinf for the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_calculated_table -#### Adds a calculated table to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_table( - name = 'Segment', - expression = '' - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression for the table. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the table. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the table. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the table to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_calculated_table_column -#### Adds a calculated table column to a calculated table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculated_table_column( - table_name = 'Segment', - column_name = 'Business Segment', - source_column = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column will reside. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **source_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The source column for the column. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_calculation_group -#### Adds a calculation group to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculation_group( - name = 'Segment', - precedence = 1 - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculation group. -> -> **precedence** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the calculation group. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the calculation group to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_calculation_item -#### Adds a calculation item to a calculation group within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_calculation_item( - table_name = 'Segment', - calculation_item_name = 'YTD' - expression = "CALCULATE(SELECTEDMEASURE(), DATESYTD('Date'[Date]))" - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **calculation_item_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the calculation item. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression encapsulating the logic of the calculation item. -> -> **ordinal** [int](https://docs.python.org/3/library/stdtypes.html#int) -> ->> Optional; -> -> **format_string_expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the calculation item. -> -### Returns -> - ---- -## add_data_column -#### Adds a data column to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_data_column( - table_name = 'Segment', - column_name = 'Business Segment', - source_column = '', - data_type = 'String' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column will exist. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **source_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column in the source. -> -> **data_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The data type of the column. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the column. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the column to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the column. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the column. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the column. -> -> **key** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Marks the column as the primary key of the table. Default value: False. -> -> **summarize_by** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Sets the value for the Summarize By property of the column. -> -### Returns -> - ---- -## add_entity_partition -#### Adds an entity partition to a table in a semantic model. Entity partitions are used for tables within Direct Lake semantic models. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_entity_partition( - table_name = 'Sales', - entity_name = 'Fact_Sales' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which to place the entity partition. -> -> **entity_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the lakehouse table. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The expression to use for the partition. This defaults to using the existing 'DatabaseQuery' expression within the Direct Lake semantic model. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the partition. -> -### Returns -> - ---- -## add_expression -#### Adds an expression to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_expression( - name = 'DatabaseQuery', - expression = 'let...' - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the expression. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The M-code encapsulating the logic for the expression. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the expression. -> -### Returns -> - ---- -## add_field_parameter -#### Adds a field parameter to a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_field_parameter( - table_name = 'Segment', - objects = ["'Product'[Product Category]", "[Sales Amount]", "'Geography'[Country]"] - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the field parameter. -> -> **objects** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; A list of columns/tables to place in the field parameter. Columns must be fully qualified (i.e. "'Table Name'[Column Name]" and measures must be unqualified (i.e. "[Measure Name]"). -> -### Returns -> - ---- -## add_hierarchy -#### Adds a hierarchy to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_hierarchy( - table_name = 'Geography', - hierarchy_name = 'Geo Hierarchy', - columns = ['Continent', 'Country', 'City'] - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the hierarchy will reside. -> -> **hierarchy_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the hierarchy. -> -> **columns** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; A list of columns to use in the hierarchy. Must be ordered from the top of the hierarchy down (i.e. ["Continent", "Country", "City"]). -> -> **levels** [list](https://docs.python.org/3/library/stdtypes.html#list) of [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; A list of levels to use in the hierarchy. These will be the displayed name (instead of the column names). If omitted, the levels will default to showing the column names. -> -> **hierarchy_description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the hierarchy. -> -> **hierarchy_hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the hierarchy to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_m_partition -#### Adds an M-partition to a table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_m_partiiton( - table_name = 'Segment', - partition_name = 'Segment', - expression = 'let...', - mode = 'Import' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the partition will reside. -> -> **partition_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the M partition. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The M-code encapsulating the logic of the partition. -> -> **mode** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The storage mode for the partition. Default value: 'Import'. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the partition. -> -### Returns -> - ---- -## add_measure -#### Adds a measure to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_measure( - table_name = 'Sales', - measure_name = 'Sales Amount', - expression = "SUM('Sales'[SalesAmount])", - format_string = '$,00' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the measure will reside. -> -> **measure_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the measure. -> -> **expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression encapsulating the logic of the measure. -> -> **format_string** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The format string of the measure. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the measure to be hidden if True. Default value: False. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the measure. -> -> **display_folder** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The display folder for the measure. -> -### Returns -> - ---- -## add_perspective -#### Adds a perspective to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_perspective( - perspective_name = 'Marketing' - ) -``` -### Parameters -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## add_relationship -#### Adds a relationship to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_relationship( - from_table = 'Sales', - from_column = 'ProductKey', - to_table = 'Product', - to_column = 'ProductKey', - from_cardinality = 'Many', - to_cardinality = 'One', - is_active = True - ) -``` -### Parameters -> **from_table** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table on the 'from' side of the relationship. -> -> **from_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column on the 'from' side of the relationship. -> -> **to_table** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table on the 'to' side of the relationship. -> -> **to_column** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column on the 'to' side of the relationship. -> -> **from_cardinality** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. -> -> **to_cardinality** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. -> -> **cross_filtering_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). Default value: 'Automatic'. -> -> **is_active** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Setting for whether the relationship is active or not. Default value: True. -> -> **security_filtering_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). Default value: 'OneDirection'. -> -> **rely_on_referential_integrity** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; ; Setting for the rely on referential integrity of the relationship. Default value: False. -> -### Returns -> - ---- -## add_role -#### Adds a role to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_role( - role_name = 'Reader' - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **model_permission** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The [model permission](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.modelpermission?view=analysisservices-dotnet) of the role. Default value: 'Reader'. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The description of the role. -> -### Returns -> - ---- -## add_table -#### Adds a table to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_table( - name = 'Sales', - description = 'This is the sales table.', - hidden = False - ) -``` -### Parameters -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **description** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The descrition of the table. -> -> **data_category** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Optional; The data category of the table. -> -> **hidden** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Optional; Sets the table to be hidden if True. Default value: False. -> -### Returns -> - ---- -## add_to_perspective -#### Adds an object to a perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_to_perspective( - object = tom.model.Tables['Sales'].Measures['Sales Amount'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## add_translation -#### Adds a translation language to the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.add_translation( - language = 'it-IT' - ) -``` -### Parameters -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support) to add to the semantic model. -> -### Returns -> - ---- -## all_calculation_items -#### Outputs a list of all calculation items within all calculation groups in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for c in tom.all_calculation_items(): - print(c.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_columns -#### Outputs a list of all columns within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for c in tom.all_columns(): - print(c.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_hierarchies -#### Outputs a list of all hierarchies within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for h in tom.all_hierarchies(): - print(h.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_levels -#### Outputs a list of all levels within all hierarchies within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for l in tom.all_levels(): - print(l.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_measures -#### Outputs a list of all measures within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for m in tom.all_measures(): - print(m.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_partitions -#### Outputs a list of all partitions within all tables in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for p in tom.all_partitions(): - print(p.Name) -``` -### Parameters -None -### Returns -> - ---- -## all_rls -#### Outputs a list of all row level security objects within all roles of the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - for r in tom.all_rls(): - print(r.Name) -``` -### Parameters -None -### Returns -> - ---- -## cardinality -#### Obtains the cardinality of a column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.cardinality(object = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## clear_annotations -#### Removes all annotations on a given object within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_annotations(object = tom.model.Tables['Product'].Columns['Color']) -``` -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_annotations(object = tom.model.Tables['Product']) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## clear_extended_properties -#### Removes all extended properties on a given object witihn a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_smantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.clear_extened_properties(object = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## data_size -#### Obtains the data size of a column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.data_size(column = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## depends_on -#### -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.depends_on( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## dictionary_size -#### Obtains the -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.dictionary_size(column = tom.model.Tables['Product'].Columns['Color']) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## fully_qualified_measures -#### Shows all fully-qualified measures referenced by a given measure's DAX expression. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.fully_qualified_measuress( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## get_annotation_value -#### Obtains the annotation value for a given object's annotation in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_annotation_value( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the annotation. -> -### Returns -> - ---- -## get_annotations -#### Obtains all of the annotations for a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_annotations( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## get_extended_properties -#### Obtains all of the extended properties for a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_extended_properties( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## get_extended_property_value -#### Obtains the extended property value for an object's extended property. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.get_extended_property_value( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyExtendedProperty' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -### Returns -> - ---- -## in_perspective -#### Identifies whether an object is in a given perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.in_perspective( - object = tom.model.Tables['Product'].Columns['Color'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## is_direct_lake -#### Identifies whether a semantic model is in Direct Lake mode. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - print(tom.is_direct_lake()) -``` -### Parameters -None -### Returns -> True/False - ---- -## is_field_parameter -#### Identifies whether a table is a field parameter. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - print(tom.is_field_parameter( - table_name = 'Parameter' - )) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -### Returns -> True/False - ---- -## records_per_segment -#### Obtains the records per segment of a partition within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.records_per_segment( - object = tom.model.Tables['Sales'].Partitions['Sales - 2024'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## referenced_by -#### Shows the objects referenced by a given object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.referenced_by( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## remove_annotation -#### Removes the annotation from an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_annotation( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the annotation. -> -### Returns -> - ---- -## remove_extended_property -#### Removes the extended property from an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_extended_property( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyExtendedProperty' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -### Returns -> - ---- -## remove_from_perspective -#### Removes an object (table, column, measure or hierarchy) from a perspective. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_from_perspective( - object = tom.model.Tables['Product'].Columns['Color'], - perspective_name = 'Marketing' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **perspective_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the perspective. -> -### Returns -> - ---- -## remove_object -#### Removes an object from a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_object( - object = tom.model.Tables['Product'].Columns['Color'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## remove_translation -#### Removes a translation for an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_translation( - object = tom.model.Tables['Product'].Columns['Color'], - language = 'it-IT' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support). -> -### Returns -> - ---- -## remove_vertipaq_annotations -#### Removes the annotations set using the [set_vertipaq_annotations] function. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.remove_vertipaq_annotations() -``` -### Parameters -None -### Returns -> - ---- -## row_count -#### Obtains the row count of a table or partition within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.row_count( - object = tom.model.Tables['Product'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## set_annotation -#### Sets an annotation on an object within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_annotation( - object = tom.model.Tables['Product'].Columns['Color'], - name = 'MyAnnotation', - value = '1' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The annotation name. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The annotation value. -> -### Returns -> - ---- -## set_direct_lake_behavior -#### Sets the [DirectLakeBehavior](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.directlakebehavior?view=analysisservices-dotnet) property for a Direct Lake semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_direct_lake_behavior( - direct_lake_behavior = 'DirectLakeOnly' - ) -``` -### Parameters -> **direct_lake_behavior** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [DirectLakeBehavior](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.directlakebehavior?view=analysisservices-dotnet) value. -> -### Returns -> - ---- -## set_extended_property -#### Sets an extended property on an object within the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_extended_property( - object = tom.model.Tables['Product'].Columns['Color'], - type = 'Json', - name = 'MyExtendedProperty', - value = '{...}' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **extended_property_type** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The type of extended property to set. Options: ['Json', 'String']. -> -> **name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the extended property. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The value of the extended property. -> -### Returns -> - ---- -## set_is_available_in_mdx -#### -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_is_available_in_mdx( - table_name = 'Sales', - column_name = 'SalesAmount', - value = False - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column resides. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **value** [bool](https://docs.python.org/3/library/stdtypes.html#bool) -> ->> Required; The value to set for the [IsAvailableInMDX](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.isavailableinmdx?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-isavailableinmdx) property. -> -### Returns -> - ---- -## set_ols -#### Sets object level security for a given role/column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_ols( - role_name = 'Reader' - table_name = 'Geography', - column_name = 'Country', - permission = 'None' - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **permission** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The permission for a given column. Options: ['Read', 'None', 'Default']. -> -### Returns -> - ---- -## set_rls -#### Sets the row level security expression for a given role/table within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_rls( - role_name = 'Reader' - table_name = 'UserGeography', - filter_expression = "'UserGeography'[UserEmail] = USERPRINCIPALNAME()" - ) -``` -### Parameters -> **role_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the role. -> -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table to place row level security. -> -> **filter_expression** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The DAX expression containing the row level security logic. -> -### Returns -> - ---- -## set_summarize_by -#### Sets the [Summarize By](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby) property on a column in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_summarize_by( - table_name = 'Geography', - column_name = 'Country', - value = 'None' - ) -``` -### Parameters -> **table_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the table in which the column resides. -> -> **column_name** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The name of the column. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [summarize by](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby) property of the column. -> -### Returns -> - ---- -## set_translation -#### Sets the translation value for an object in a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_translation( - object = tom.model.Tables['Geography'] - language = 'it-IT' - property = 'Name' - value = 'Geografia' - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **language** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The [language code](https://learn.microsoft.com/azure/ai-services/translator/language-support) in which to translate the object property. -> -> **property** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The property to translate. One of the following values: ['Name', 'Description', 'Display Folder']. -> -> **value** [str](https://docs.python.org/3/library/stdtypes.html#str) -> ->> Required; The translation value. -> -### Returns -> - ---- -## set_vertipaq_annotations -#### Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = False) as tom: - tom.set_vertipaq_annotations() -``` -### Parameters -None -### Returns -> - ---- -## total_size -#### Obtains the total size (in bytes) of a table or column within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.total_size( - object = tom.model.Tables['Sales'].Columns['SalesAmount'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> The total size (in bytes) of the object. - ---- -## unqualified_columns -#### Shows all unqalified columns referenced by a given measure's DAX expression. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.unqualified_columns( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_calc_item -#### Identifies the calculation items which reference a given object. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.used_in_calc_item( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_hierarchies -#### Identifies the hierarchies which reference a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_hierarchies( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_in_levels -#### Identifies the levels which reference a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_levels( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_in_relationships -#### Identifies the relationships which use a given table/column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_relationships( - object = tom.model.Tables['Geography'].Columns['GeographyID'] - ) -``` -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_relationships( - object = tom.model.Tables['Geography'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - ---- -## used_in_rls -#### Identifies the filter expressions which reference a given object. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - - dep = labs.get_model_calc_dependencies(dataset = 'AdventureWorks', workspace = None) - tom.used_in_rls( - object = tom.model.Tables['Product'].Columns['Color'], - dependencies = dep - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -> **dependencies** -> ->> Required; A dataframe showing the model's calculation dependencies. -> -### Returns -> - ---- -## used_in_sort_by -#### Identifies the column used for sorting a given column. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_in_sort_by( - column = tom.model.Tables['Geography'].Columns['City'] - ) -``` -### Parameters -> **column** -> ->> Required; The TOM column object. -> -### Returns -> - ---- -## used_size -#### Obtains the used of a hierarchy or relationship within a semantic model. -```python -import semantic-link-labs as labs -from semantic-link-labs.TOM import connect_semantic_model - -with connect_semantic_model(dataset = 'AdventureWorks', workspace = None, readonly = True) as tom: - tom.used_size( - object = tom.model.Tables['Geography'].Hierarchies['Geo Hierarchy'] - ) -``` -### Parameters -> **object** -> ->> Required; The TOM object. -> -### Returns -> - - - - --- ## Direct Lake migration diff --git a/docs/requirements.txt b/docs/requirements.txt index ad8b4fa4..75a5603a 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1,10 @@ -semantic-link-sempy \ No newline at end of file +semantic-link-sempy +sphinx_rtd_theme +pandas==2.0.3 +# pyspark==3.5.0 +azure-identity==1.7.1 +azure-keyvault-secrets +azure-storage-file-datalake==12.3.1 +azure-storage-blob>=12.9.0 +anytree +IPython \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index beb4d783..8001986a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,6 +1,6 @@ import os import sys -sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath('../../src/')) # Configuration file for the Sphinx documentation builder. # @@ -13,18 +13,27 @@ project = 'semantic-link-labs' copyright = '2024, Microsoft and community' author = 'Microsoft and community' -release = '0.4.0' +release = '0.4.1' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon'] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.napoleon', + "sphinx.ext.intersphinx", +] + +intersphinx_mapping = { + 'python': ('http://docs.python.org/', None), + 'numpy': ('https://numpy.org/doc/stable/', None), + 'pandas': ('http://pandas.pydata.org/pandas-docs/dev', None) +} templates_path = ['_templates'] exclude_patterns = [] - # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output @@ -32,6 +41,6 @@ html_static_path = ['_static'] # List of packages we don't want to install in the environment -autodoc_mock_imports = ['delta', 'synapse'] +autodoc_mock_imports = ['delta', 'synapse', 'jwt', 'semantic-link-sempy', 'pyspark', 'powerbiclient'] napoleon_numpy_docstring = True \ No newline at end of file diff --git a/notebooks/Migration to Direct Lake.ipynb b/notebooks/Migration to Direct Lake.ipynb index 0eff22da..7a32e723 100644 --- a/notebooks/Migration to Direct Lake.ipynb +++ b/notebooks/Migration to Direct Lake.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"969a29bf","metadata":{},"source":["### Import the library and set initial parameters"]},{"cell_type":"code","execution_count":null,"id":"29c923f8","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","\n","dataset_name = '' #Enter the import/DQ semantic model name\n","workspace_name = None #Enter the workspace of the import/DQ semantic model. It set to none it will use the current workspace.\n","new_dataset_name = '' #Enter the new Direct Lake semantic model name\n","new_dataset_workspace_name = None #Enter the workspace where the Direct Lake model will be created. If set to None it will use the current workspace.\n","lakehouse_name = None #Enter the lakehouse to be used for the Direct Lake model. If set to None it will use the lakehouse attached to the notebook.\n","lakehouse_workspace_name = None #Enter the lakehouse workspace. If set to None it will use the new_dataset_workspace_name."]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Create the [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file\n","\n","This encapsulates all of the semantic model's Power Query logic into a single file."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.create_pqt_file(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"bf945d07-544c-4934-b7a6-cfdb90ca725e","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Import the Power Query Template to Dataflows Gen2\n","\n","- Open the [OneLake file explorer](https://www.microsoft.com/download/details.aspx?id=105222) and sync your files (right click -> Sync from OneLake)\n","\n","- Navigate to your lakehouse. From this window, create a new Dataflows Gen2 and import the Power Query Template file from OneLake (OneLake -> Workspace -> Lakehouse -> Files...), and publish the Dataflows Gen2.\n","\n","
\n","Important!: Make sure to create the Dataflows Gen2 from within the lakehouse window. That will ensure that all the tables automatically map to that lakehouse as the destination. Otherwise, you will have to manually map each table to its destination individually.\n","
"]},{"cell_type":"markdown","id":"9975db7d","metadata":{},"source":["### Create the Direct Lake model based on the import/DQ semantic model\n","\n","Calculated columns are not migrated to the Direct Lake model as they are not supported in Direct Lake mode."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import time\n","labs.create_blank_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","\n","time.sleep(2)\n","\n","labs.migrate_calc_tables_to_lakehouse(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_tables_columns_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_calc_tables_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","labs.migrate_model_objects_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","labs.migrate_field_parameters(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","time.sleep(2)\n","labs.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","labs.refresh_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","labs.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"bb98bb13","metadata":{},"source":["### Show migrated/unmigrated objects"]},{"cell_type":"code","execution_count":null,"id":"5db2f22c","metadata":{},"outputs":[],"source":["labs.migration_validation(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name, \n"," workspace = workspace_name, \n"," new_dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"fa244e9d-87c2-4a66-a7e0-be539a0ac7de","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Rebind all reports using the old semantic model to the new Direct Lake semantic model"]},{"cell_type":"code","execution_count":null,"id":"d4e867cc","metadata":{},"outputs":[],"source":["labs.report_rebind_all(\n"," dataset = dataset_name,\n"," dataset_workspace = workspace_name,\n"," new_dataset = new_dataset_name,\n"," new_dataset_workpace = new_dataset_workspace_name,\n"," report_workspace = workspace_name)"]},{"cell_type":"markdown","id":"3365d20d","metadata":{},"source":["### Rebind reports one-by-one (optional)"]},{"cell_type":"code","execution_count":null,"id":"056b7180-d7ac-492c-87e7-ac7d0e4bb929","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["report_name = '' # Enter report name which you want to rebind to the new Direct Lake model\n","\n","labs.report_rebind(\n"," report = report_name,\n"," dataset = new_dataset_name,\n"," report_workspace=workspace_name,\n"," dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"526f2327","metadata":{},"source":["### Show unsupported objects"]},{"cell_type":"code","execution_count":null,"id":"a47376d7","metadata":{},"outputs":[],"source":["dfT, dfC, dfR = labs.show_unsupported_direct_lake_objects(dataset = dataset_name, workspace = workspace_name)\n","\n","print('Calculated Tables are not supported...')\n","display(dfT)\n","print(\"Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations\")\n","print('Calculated columns are not supported. Columns of binary data type are not supported.')\n","display(dfC)\n","print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')\n","display(dfR)"]},{"cell_type":"markdown","id":"ed08ba4c","metadata":{},"source":["### Schema check between semantic model tables/columns and lakehouse tables/columns\n","\n","This will list any tables/columns which are in the new semantic model but do not exist in the lakehouse"]},{"cell_type":"code","execution_count":null,"id":"03889ba4","metadata":{},"outputs":[],"source":["labs.direct_lake_schema_compare(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"2229963b","metadata":{},"source":["### Show calculated tables which have been migrated to the Direct Lake semantic model as regular tables"]},{"cell_type":"code","execution_count":null,"id":"dd537d90","metadata":{},"outputs":[],"source":["labs.list_direct_lake_model_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.12.3"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"969a29bf","metadata":{},"source":["### Import the library and set initial parameters"]},{"cell_type":"code","execution_count":null,"id":"29c923f8","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs import migration, report, directlake\n","\n","dataset_name = '' #Enter the import/DQ semantic model name\n","workspace_name = None #Enter the workspace of the import/DQ semantic model. It set to none it will use the current workspace.\n","new_dataset_name = '' #Enter the new Direct Lake semantic model name\n","new_dataset_workspace_name = None #Enter the workspace where the Direct Lake model will be created. If set to None it will use the current workspace.\n","lakehouse_name = None #Enter the lakehouse to be used for the Direct Lake model. If set to None it will use the lakehouse attached to the notebook.\n","lakehouse_workspace_name = None #Enter the lakehouse workspace. If set to None it will use the new_dataset_workspace_name."]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Create the [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file\n","\n","This encapsulates all of the semantic model's Power Query logic into a single file."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["migration.create_pqt_file(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"bf945d07-544c-4934-b7a6-cfdb90ca725e","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Import the Power Query Template to Dataflows Gen2\n","\n","- Open the [OneLake file explorer](https://www.microsoft.com/download/details.aspx?id=105222) and sync your files (right click -> Sync from OneLake)\n","\n","- Navigate to your lakehouse. From this window, create a new Dataflows Gen2 and import the Power Query Template file from OneLake (OneLake -> Workspace -> Lakehouse -> Files...), and publish the Dataflows Gen2.\n","\n","
\n","Important!: Make sure to create the Dataflows Gen2 from within the lakehouse window. That will ensure that all the tables automatically map to that lakehouse as the destination. Otherwise, you will have to manually map each table to its destination individually.\n","
"]},{"cell_type":"markdown","id":"9975db7d","metadata":{},"source":["### Create the Direct Lake model based on the import/DQ semantic model\n","\n","Calculated columns are not migrated to the Direct Lake model as they are not supported in Direct Lake mode."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import time\n","labs.create_blank_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","\n","time.sleep(2)\n","\n","migration.migrate_calc_tables_to_lakehouse(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_tables_columns_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_calc_tables_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name,\n"," lakehouse = lakehouse_name,\n"," lakehouse_workspace = lakehouse_workspace_name)\n","migration.migrate_model_objects_to_semantic_model(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","migration.migrate_field_parameters(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name,\n"," workspace = workspace_name,\n"," new_dataset_workspace = new_dataset_workspace_name)\n","time.sleep(2)\n","migration.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","migration.refresh_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)\n","migration.refresh_semantic_model(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"bb98bb13","metadata":{},"source":["### Show migrated/unmigrated objects"]},{"cell_type":"code","execution_count":null,"id":"5db2f22c","metadata":{},"outputs":[],"source":["migration.migration_validation(\n"," dataset = dataset_name,\n"," new_dataset = new_dataset_name, \n"," workspace = workspace_name, \n"," new_dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"fa244e9d-87c2-4a66-a7e0-be539a0ac7de","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Rebind all reports using the old semantic model to the new Direct Lake semantic model"]},{"cell_type":"code","execution_count":null,"id":"d4e867cc","metadata":{},"outputs":[],"source":["report.report_rebind_all(\n"," dataset = dataset_name,\n"," dataset_workspace = workspace_name,\n"," new_dataset = new_dataset_name,\n"," new_dataset_workpace = new_dataset_workspace_name,\n"," report_workspace = workspace_name)"]},{"cell_type":"markdown","id":"3365d20d","metadata":{},"source":["### Rebind reports one-by-one (optional)"]},{"cell_type":"code","execution_count":null,"id":"056b7180-d7ac-492c-87e7-ac7d0e4bb929","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["report_name = '' # Enter report name which you want to rebind to the new Direct Lake model\n","\n","report.report_rebind(\n"," report = report_name,\n"," dataset = new_dataset_name,\n"," report_workspace=workspace_name,\n"," dataset_workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"526f2327","metadata":{},"source":["### Show unsupported objects"]},{"cell_type":"code","execution_count":null,"id":"a47376d7","metadata":{},"outputs":[],"source":["dfT, dfC, dfR = directlake.show_unsupported_direct_lake_objects(dataset = dataset_name, workspace = workspace_name)\n","\n","print('Calculated Tables are not supported...')\n","display(dfT)\n","print(\"Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations\")\n","print('Calculated columns are not supported. Columns of binary data type are not supported.')\n","display(dfC)\n","print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.')\n","display(dfR)"]},{"cell_type":"markdown","id":"ed08ba4c","metadata":{},"source":["### Schema check between semantic model tables/columns and lakehouse tables/columns\n","\n","This will list any tables/columns which are in the new semantic model but do not exist in the lakehouse"]},{"cell_type":"code","execution_count":null,"id":"03889ba4","metadata":{},"outputs":[],"source":["directlake.direct_lake_schema_compare(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]},{"cell_type":"markdown","id":"2229963b","metadata":{},"source":["### Show calculated tables which have been migrated to the Direct Lake semantic model as regular tables"]},{"cell_type":"code","execution_count":null,"id":"dd537d90","metadata":{},"outputs":[],"source":["directlake.list_direct_lake_model_calc_tables(dataset = new_dataset_name, workspace = new_dataset_workspace_name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.12.3"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Model Optimization.ipynb b/notebooks/Model Optimization.ipynb index 43df2f97..0a5a448f 100644 --- a/notebooks/Model Optimization.ipynb +++ b/notebooks/Model Optimization.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analzyer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path = '', file_name = '')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the rules and exports the results to a table in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None, export = True)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["labs.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["labs.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False, export = True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["labs.check_fallback_reason(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["labs.optimize_lakehouse_tables(tables = ['', ''], lakehouse = None, workspace = None)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["labs.warm_direct_lake_cache_isresident(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["labs.warm_direct_lake_cache_perspective(dataset = '', workspace = None, perspective = '', add_dependencies = True)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs._tom import connect_semantic_model\n","from sempy_labs import lakehouse as lake\n","from sempy_labs import directlake"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = '', workspace = None, export = 'table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analzyer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path = '', file_name = '')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the rules and exports the results to a table in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = '', workspace = None, export = True)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False, export = True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["directlake.check_fallback_reason(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["lake.optimize_lakehouse_tables(tables = ['', ''], lakehouse = None, workspace = None)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_isresident(dataset = '', workspace = None)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_perspective(dataset = '', workspace = None, perspective = '', add_dependencies = True)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Query Scale Out.ipynb b/notebooks/Query Scale Out.ipynb index 131a967b..a29de474 100644 --- a/notebooks/Query Scale Out.ipynb +++ b/notebooks/Query Scale Out.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"b195eae8","metadata":{},"source":["### Import the library and set the initial parameters"]},{"cell_type":"code","execution_count":null,"id":"1344e286","metadata":{},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","dataset = '' # Enter your dataset name\n","workspace = None # Enter your workspace name (if set to None it will use the workspace in which the notebook is running)"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### View [Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out) (QSO) settings"]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.list_qso_settings(dataset = dataset, workspace = workspace )"]},{"cell_type":"markdown","id":"b0717cbb","metadata":{},"source":["### [Configure Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out-configure)\n","Setting 'auto_sync' to True will ensure that the semantic model automatically syncs read-only replicas. Setting this to False will necessitate syncing the replicas (i.e. via the qso_sync function).\n","\n","The 'max_read_only_replicas' is the maximum number of read-only replicas for the semantic model (0-64, -1 for automatic number of replicas).\n"]},{"cell_type":"code","execution_count":null,"id":"ec37dd14","metadata":{},"outputs":[],"source":["labs.set_qso(dataset = dataset, auto_sync = False, max_read_only_replicas = -1, workspace = workspace)"]},{"cell_type":"markdown","id":"5d6beadd","metadata":{},"source":["### Sync Query Scale Out replicas"]},{"cell_type":"code","execution_count":null,"id":"7ca10963","metadata":{},"outputs":[],"source":["labs.qso_sync(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"719f428f","metadata":{},"source":["### Check Query Scale Out Sync Status"]},{"cell_type":"code","execution_count":null,"id":"db6f197c","metadata":{},"outputs":[],"source":["dfA, dfB = labs.qso_sync_status(dataset = dataset, workspace = workspace)\n","display(dfA)\n","display(dfB)"]},{"cell_type":"markdown","id":"e92cdf34","metadata":{},"source":["### Disable Query Scale Out"]},{"cell_type":"code","execution_count":null,"id":"0624d649","metadata":{},"outputs":[],"source":["labs.disable_qso(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"786d89bc","metadata":{},"source":["### Enable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"d521b228","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Large', workspace = workspace)"]},{"cell_type":"markdown","id":"e90c20e9","metadata":{},"source":["### Disable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"433220b2","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Small', workspace = workspace)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"b195eae8","metadata":{},"source":["### Import the library and set the initial parameters"]},{"cell_type":"code","execution_count":null,"id":"1344e286","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","dataset = '' # Enter your dataset name\n","workspace = None # Enter your workspace name (if set to None it will use the workspace in which the notebook is running)"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### View [Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out) (QSO) settings"]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.list_qso_settings(dataset = dataset, workspace = workspace )"]},{"cell_type":"markdown","id":"b0717cbb","metadata":{},"source":["### [Configure Query Scale Out](https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out-configure)\n","Setting 'auto_sync' to True will ensure that the semantic model automatically syncs read-only replicas. Setting this to False will necessitate syncing the replicas (i.e. via the qso_sync function).\n","\n","The 'max_read_only_replicas' is the maximum number of read-only replicas for the semantic model (0-64, -1 for automatic number of replicas).\n"]},{"cell_type":"code","execution_count":null,"id":"ec37dd14","metadata":{},"outputs":[],"source":["labs.set_qso(dataset = dataset, auto_sync = False, max_read_only_replicas = -1, workspace = workspace)"]},{"cell_type":"markdown","id":"5d6beadd","metadata":{},"source":["### Sync Query Scale Out replicas"]},{"cell_type":"code","execution_count":null,"id":"7ca10963","metadata":{},"outputs":[],"source":["labs.qso_sync(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"719f428f","metadata":{},"source":["### Check Query Scale Out Sync Status"]},{"cell_type":"code","execution_count":null,"id":"db6f197c","metadata":{},"outputs":[],"source":["dfA, dfB = labs.qso_sync_status(dataset = dataset, workspace = workspace)\n","display(dfA)\n","display(dfB)"]},{"cell_type":"markdown","id":"e92cdf34","metadata":{},"source":["### Disable Query Scale Out"]},{"cell_type":"code","execution_count":null,"id":"0624d649","metadata":{},"outputs":[],"source":["labs.disable_qso(dataset = dataset, workspace = workspace)"]},{"cell_type":"markdown","id":"786d89bc","metadata":{},"source":["### Enable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"d521b228","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Large', workspace = workspace)"]},{"cell_type":"markdown","id":"e90c20e9","metadata":{},"source":["### Disable large semantic model format"]},{"cell_type":"code","execution_count":null,"id":"433220b2","metadata":{},"outputs":[],"source":["labs.set_semantic_model_storage_format(dataset = dataset, storage_format = 'Small', workspace = workspace)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/notebooks/Tabular Object Model.ipynb b/notebooks/Tabular Object Model.ipynb index 595bf886..fe6df427 100644 --- a/notebooks/Tabular Object Model.ipynb +++ b/notebooks/Tabular Object Model.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy.labs as labs\n","from sempy.labs.TOM import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://github.com/m-kovalsky/fabric_cat_tools) to see all available parameters for each function."]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name = t.Name, measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name = t.Name, measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name ='Product', column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," tom.add_data_column(table_name = 'Segment', column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n","\n"," tom.add_calculated_column(table_name = 'Internet Sales', column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name = t.Name, column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name = 'Geography', hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name = t.Name, hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table = 'Internet Sales', from_column = 'ProductKey',\n"," to_table = 'Product', to_column = 'ProductKey', \n"," from_cardinality = 'Many', to_cardinality = 'One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_m_partition(table_name = table_name, partition_name = table_name, expression = 'let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_entity_partition(table_name = table_name, entity_name = table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name = table_name, expression = \"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name = table_name, column_name = 'Color', source_column = \"'Product[Color]\", data_type = 'String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name = 'Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(role_name ='Reader', table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name = r.Name, table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name = 'Reader', table_name = 'Product', column_name = 'Size', permission = 'None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name = r.Name, table_name = t.Name, column_name = 'Size', permission = 'None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name = 'MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language = 'it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language = 'it-IT', property = 'Name', value = 'Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name = 'Parameter', objects = \"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object = t.Columns['Size'])\n"," tom.remove_object(object = t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object = tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object = tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(t.Name + ' : ' + str(rc))\n"," for c in t.Columns:\n"," col_size = tom.total_size(column = c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(full_name + ' : ' + h.Name)"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(full_name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(t.Name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs) to see the latest version.\n","\n","Check [here](https://github.com/microsoft/semantic-link-labs/releases) for the library's release notes."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install \"https://raw.githubusercontent.com/microsoft/semantic-link-labs/main/semantic-link-labs-0.4.1-py3-none-any.whl\""]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Connect to the [Tabular Object Model](https://learn.microsoft.com/analysis-services/tom/introduction-to-the-tabular-object-model-tom-in-analysis-services-amo?view=asallproducts-allversions) ([TOM](https://learn.microsoft.com/dotnet/api/microsoft.analysisservices.tabular.model?view=analysisservices-dotnet))\n","Setting the 'readonly' property to False enables read/write mode. This allows changes to be made to the semantic model."]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs._tom import connect_semantic_model\n","\n","dataset = '' # Enter dataset name\n","workspace = None # Enter workspace name\n","\n","with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," print(t.Name)"]},{"cell_type":"markdown","id":"fc6b277e","metadata":{},"source":["### Make changes to a semantic model using custom functions\n","Note that the custom functions have additional optional parameters (which may not be used in the examples below) for adding properties to model objects. Check the [documentation](https://github.com/m-kovalsky/fabric_cat_tools) to see all available parameters for each function."]},{"cell_type":"markdown","id":"402a477c","metadata":{},"source":["#### Add measure(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"bdaaaa5c","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name ='Internet Sales', measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\") "]},{"cell_type":"code","execution_count":null,"id":"a53a544b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Internet Sales':\n"," tom.add_measure(table_name = t.Name, measure_name = 'Sales Amount', expression = \"SUM('Internet Sales'[SalesAmount])\")\n"," tom.add_measure(table_name = t.Name, measure_name = 'Order Quantity', expression = \"SUM('Internet Sales'[OrderQty])\")"]},{"cell_type":"markdown","id":"1cb1632f","metadata":{},"source":["#### Add column(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"81a22749","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_data_column(table_name ='Product', column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," tom.add_data_column(table_name = 'Segment', column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n","\n"," tom.add_calculated_column(table_name = 'Internet Sales', column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"code","execution_count":null,"id":"053b6516","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Size Range', source_column = 'SizeRange', data_type = 'Int64')\n"," elif t.Name == 'Segment':\n"," tom.add_data_column(table_name = t.Name, column_name = 'Summary Segment', source_column = 'SummarySegment', data_type = 'String')\n"," elif t.Name == 'Internet Sales':\n"," tom.add_calculated_column(table_name = t.Name, column_name = 'GrossMargin', expression = \"'Internet Sales'[SalesAmount] - 'Internet Sales'[ProductCost]\", data_type = 'Decimal')"]},{"cell_type":"markdown","id":"f53dcca7","metadata":{},"source":["#### Add hierarchies to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"a9309e23","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_hierarchy(table_name = 'Geography', hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"code","execution_count":null,"id":"a04281ce","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Geography':\n"," tom.add_hierarchy(table_name = t.Name, hierarchy_name = 'Geo Hierarchy', levels = ['Continent', 'Country', 'State', 'City'])"]},{"cell_type":"markdown","id":"47c06a4f","metadata":{},"source":["#### Add relationship(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"e8cd7bbf","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_relationship(\n"," from_table = 'Internet Sales', from_column = 'ProductKey',\n"," to_table = 'Product', to_column = 'ProductKey', \n"," from_cardinality = 'Many', to_cardinality = 'One')"]},{"cell_type":"markdown","id":"3cc7f11e","metadata":{},"source":["#### Add a table with an M partition to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0f5dd66a","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_m_partition(table_name = table_name, partition_name = table_name, expression = 'let....')"]},{"cell_type":"markdown","id":"ea389123","metadata":{},"source":["#### Add a table with an entity partition to a Direct Lake semantic model "]},{"cell_type":"code","execution_count":null,"id":"f75387d1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_table(name = table_name)\n"," tom.add_entity_partition(table_name = table_name, entity_name = table_name)"]},{"cell_type":"markdown","id":"e74d0f54","metadata":{},"source":["#### Add a calculated table (and columns) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"934f7315","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," table_name = 'Sales'\n"," tom.add_calculated_table(name = table_name, expression = \"DISTINCT('Product'[Color])\")\n"," tom.add_calculated_table_column(table_name = table_name, column_name = 'Color', source_column = \"'Product[Color]\", data_type = 'String')"]},{"cell_type":"markdown","id":"0e7088b7","metadata":{},"source":["#### Add role(s) to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"ad60ebb9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_role(role_name = 'Reader')"]},{"cell_type":"markdown","id":"c541f81a","metadata":{},"source":["#### Set row level security (RLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"98603a08","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_rls(role_name ='Reader', table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"code","execution_count":null,"id":"effea009","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," tom.set_rls(role_name = r.Name, table_name = 'Product', filter_expression = \"'Dim Product'[Color] = \\\"Blue\\\"\")"]},{"cell_type":"markdown","id":"7fa7a03c","metadata":{},"source":["#### Set object level security (OLS) to the semantic model\n","This adds row level security (or updates it if it already exists)"]},{"cell_type":"code","execution_count":null,"id":"dd0def9d","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_ols(role_name = 'Reader', table_name = 'Product', column_name = 'Size', permission = 'None')"]},{"cell_type":"code","execution_count":null,"id":"7a389dc7","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for r in tom.model.Roles:\n"," if r.Name == 'Reader':\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.set_ols(role_name = r.Name, table_name = t.Name, column_name = 'Size', permission = 'None')"]},{"cell_type":"markdown","id":"d0f7ccd1","metadata":{},"source":["#### Add calculation groups and calculation items to the semantic model"]},{"cell_type":"code","execution_count":null,"id":"97f4708b","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_group(name = 'MyCalcGroup')"]},{"cell_type":"code","execution_count":null,"id":"fef68832","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = 'MyCalcGroup', calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"code","execution_count":null,"id":"c7653dcc","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'MyCalcGroup':\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'YTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESYTD('Calendar'[CalendarDate]))\")\n"," tom.add_calculation_item(table_name = t.Name, calculation_item_name = 'MTD', expression = \"CALCULATE(SELECTEDMEASURE(), DATESMTD('Calendar'[CalendarDate]))\")"]},{"cell_type":"markdown","id":"c6450c74","metadata":{},"source":["#### Add translations to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"2b616b90","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_translation(language = 'it-IT')"]},{"cell_type":"code","execution_count":null,"id":"dc24c200","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_translation(object = tom.model.Tables['Product'], language = 'it-IT', property = 'Name', value = 'Produtto')"]},{"cell_type":"markdown","id":"3048cc95","metadata":{},"source":["#### Add a [Field Parameter](https://learn.microsoft.com/power-bi/create-reports/power-bi-field-parameters) to a semantic model"]},{"cell_type":"code","execution_count":null,"id":"0a94af94","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.add_field_parameter(table_name = 'Parameter', objects = \"'Product'[Color], [Sales Amount], 'Geography'[Country]\")"]},{"cell_type":"markdown","id":"95aac09a","metadata":{},"source":["#### Remove an object(s) from a semantic model"]},{"cell_type":"code","execution_count":null,"id":"1e2572a8","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," if t.Name == 'Product':\n"," tom.remove_object(object = t.Columns['Size'])\n"," tom.remove_object(object = t.Hierarchies['Product Hierarchy'])"]},{"cell_type":"code","execution_count":null,"id":"bc453177","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.remove_object(object = tom.model.Tables['Product'].Columns['Size'])\n"," tom.remove_object(object = tom.model.Tables['Product'].Hierarchies['Product Hierarchy'])"]},{"cell_type":"markdown","id":"e0d0cb9e","metadata":{},"source":["### Custom functions to loop through non-top-level objects in a semantic model"]},{"cell_type":"code","execution_count":null,"id":"cbe3b1a3","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," print(c.Name)"]},{"cell_type":"code","execution_count":null,"id":"3f643e66","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for m in tom.all_measures():\n"," print(m.Name)"]},{"cell_type":"code","execution_count":null,"id":"ed1cde0f","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for p in tom.all_partitions():\n"," print(p.Name)"]},{"cell_type":"code","execution_count":null,"id":"f48014ae","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for h in tom.all_hierarchies():\n"," print(h.Name)"]},{"cell_type":"code","execution_count":null,"id":"9f5e7b72","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for ci in tom.all_calculation_items():\n"," print(ci.Name)"]},{"cell_type":"code","execution_count":null,"id":"3cd9ebc1","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for l in tom.all_levels():\n"," print(l.Name)"]},{"cell_type":"code","execution_count":null,"id":"12c58bad","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," for rls in tom.all_rls():\n"," print(rls.Name)"]},{"cell_type":"markdown","id":"1a294bd2","metadata":{},"source":["### See Vertipaq Analyzer stats"]},{"cell_type":"code","execution_count":null,"id":"469660e9","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom:\n"," tom.set_vertipaq_annotations()\n","\n"," for t in tom.model.Tables:\n"," rc = tom.row_count(object = t)\n"," print(t.Name + ' : ' + str(rc))\n"," for c in t.Columns:\n"," col_size = tom.total_size(column = c)\n"," print(labs.format_dax_object_name(t.Name, c.Name) + ' : ' + str(col_size))"]},{"cell_type":"markdown","id":"1ab26dfd","metadata":{},"source":["### 'UsedIn' functions"]},{"cell_type":"code","execution_count":null,"id":"412bf287","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for h in tom.used_in_hierarchies(column = c):\n"," print(full_name + ' : ' + h.Name)"]},{"cell_type":"code","execution_count":null,"id":"76556900","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for c in tom.all_columns():\n"," full_name = labs.format_dax_object_name(c.Parent.Name, c.Name)\n"," for r in tom.used_in_relationships(object = c):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(full_name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"4d9ec24e","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," for t in tom.model.Tables:\n"," for r in tom.used_in_relationships(object = t):\n"," rel_name = labs.create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name)\n"," print(t.Name + ' : ' + rel_name)"]},{"cell_type":"code","execution_count":null,"id":"82251336","metadata":{},"outputs":[],"source":["with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom:\n"," dep = labs.get_model_calc_dependencies(dataset = dataset, workspace=workspace)\n"," for o in tom.used_in_rls(object = tom.model.Tables['Product'].Columns['Color'], dependencies=dep):\n"," print(o.Name)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..8cdf0c15 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name="semantic-link-labs" +authors = [ + { name = "Microsoft Corporation" }, +] +version="0.4.1" +description="Semantic Link Labs project" +requires-python=">=3.10,<3.12" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", + "Framework :: Jupyter" +] +license= { text = "MIT License" } + +dependencies = [ + "semantic-link-sempy>=0.7.5", + "anytree", + "powerbiclient" +] + +[project.optional-dependencies] +test = [ + "pytest>=8.2.1", +] + +[project.urls] +Repository = "https://github.com/microsoft/semantic-link-labs.git" + +[[tool.mypy.overrides]] +module = "sempy.*,Microsoft.*,System.*,anytree.*,powerbiclient.*,synapse.ml.services.*" +ignore_missing_imports = true \ No newline at end of file diff --git a/sempy_labs/AI.py b/sempy_labs/AI.py deleted file mode 100644 index e70aaa30..00000000 --- a/sempy_labs/AI.py +++ /dev/null @@ -1,418 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from synapse.ml.services.openai import OpenAICompletion -from pyspark.sql.functions import col -from pyspark.sql import SparkSession -from typing import List, Optional, Union - -def optimize_semantic_model(dataset: str, workspace: Optional[str] = None): - - from .ModelBPA import run_model_bpa - from .Fallback import check_fallback_reason - from .HelperFunctions import format_dax_object_name - - modelBPA = run_model_bpa(dataset = dataset, workspace = workspace, return_dataframe = True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended = True) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC['Total Size'] = dfC['Total Size'].astype('int') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - - modelBPA_col = modelBPA[modelBPA['Object Type'] == 'Column'] - modelBPA_col = pd.merge(modelBPA_col, dfC[['Column Object', 'Total Size']], left_on = 'Object Name', right_on = 'Column Object', how = 'left') - - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - - if isDirectLake: - fallback = check_fallback_reason(dataset = dataset, workspace = workspace) - fallback_filt = fallback[fallback['FallbackReasonID']== 2] - - if len(fallback_filt) > 0: - print(f"The '{dataset}' semantic model is a Direct Lake semantic model which contains views. Since views always fall back to DirectQuery, it is recommended to only use lakehouse tables and not views.") - - # Potential model reduction estimate - ruleNames = ['Remove unnecessary columns','Set IsAvailableInMdx to false on non-attribute columns'] - - for rule in ruleNames: - df = modelBPA_col[modelBPA_col['Rule Name'] == rule] - df_filt = df[['Object Name', 'Total Size']].sort_values(by='Total Size', ascending=False) - totSize = df['Total Size'].sum() - if len(df_filt) > 0: - print(f"Potential savings of {totSize} bytes from following the '{rule}' rule.") - display(df_filt) - else: - print(f"The '{rule}' rule has been followed.") - - -def generate_measure_descriptions(dataset: str, measures: Union[str,List[str]], gpt_model: Optional[str] = 'gpt-35-turbo', workspace: Optional[str] = None): - - service_name = 'synapseml-openai' - - if isinstance(measures, str): - measures = [measures] - - validModels = ['gpt-35-turbo', 'gpt-35-turbo-16k', 'gpt-4'] - if gpt_model not in validModels: - print(f"The '{gpt_model}' model is not a valid model. Enter a gpt_model from this list: {validModels}.") - return - - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - - if measures is not None: - dfM_filt = dfM[dfM['Measure Name'].isin(measures)] - else: - dfM_filt = dfM - - df = dfM_filt[['Table Name', 'Measure Name', 'Measure Expression']] - - df['prompt'] = f"The following is DAX code used by Microsoft Power BI. Please explain this code in simple terms:" +df['Measure Expression'] - - # Generate new column in df dataframe which has the AI-generated descriptions - completion = { - OpenAICompletion() - .setDeploymentName(gpt_model) - .setMaxTokens(200) - .setCustomServiceName(service_name) - .setPromptCol('prompt') - .setErrorCol('error') - .setOutputCol('completions') - } - - completed_df = completion.transform(df).cache() - completed_df.select( - col('prompt'), - col('error'), - col('completions.choices.text').getItem(0).alias('text'), - ) - - # Update the model to use the new descriptions - tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) - m = tom_server.Databases.GetByName(dataset).Model - - #for t in m.Tables: - #tName = t.Name - #for ms in t.Measures: - #mName = ms.Name - #mDesc = promptValue - - #m.SaveChanges() - -def generate_aggs(dataset: str, table_name: str, columns: Union[str,List[str]], workspace: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - from .HelperFunctions import get_direct_lake_sql_endpoint, create_abfss_path, format_dax_object_name, resolve_lakehouse_id - - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - import System - - #columns = { - #'SalesAmount': 'Sum', - #'ProductKey': 'GroupBy', - #'OrderDateKey': 'GroupBy' - #} - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if lakehouse_workspace == None: - lakehouse_workspace = workspace - lakehouse_workspace_id = workspace_id - else: - lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) - - if isinstance(columns, str): - columns = [columns] - - columnValues = columns.keys() - - aggTypes = ['Sum', 'Count', 'Min', 'Max', 'GroupBy'] - aggTypesAggregate = ['Sum', 'Count', 'Min', 'Max'] - numericTypes = ['Int64', 'Double', 'Decimal'] - - if any(value not in aggTypes for value in columns.values()): - print(f"Invalid aggregation type(s) have been specified in the 'columns' parameter. Valid aggregation types: {aggTypes}.") - return - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode. This function is only relevant for Direct Lake semantic models.") - return - - dfC_filtT = dfC[dfC['Table Name'] == table_name] - - if len(dfC_filtT) == 0: - print(f"The '{table_name}' table does not exist in the '{dataset}' semantic model within the '{workspace}' workspace.") - return - - dfC_filt = dfC[(dfC['Table Name'] == table_name) & (dfC['Column Name'].isin(columnValues))] - - if len(columns) != len(dfC_filt): - print(f"Columns listed in '{columnValues}' do not exist in the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace.") - return - - # Check if doing sum/count/min/max etc. on a non-number column - for col,agg in columns.items(): - dfC_col = dfC_filt[dfC_filt['Column Name'] == col] - dataType = dfC_col['Data Type'].iloc[0] - if agg in aggTypesAggregate and dataType not in numericTypes: - print(f"The '{col}' column in the '{table_name}' table is of '{dataType}' data type. Only columns of '{numericTypes}' data types can be aggregated as '{aggTypesAggregate}' aggregation types.") - return - - # Create/update lakehouse delta agg table - aggSuffix = '_agg' - aggTableName = f"{table_name}{aggSuffix}" - aggLakeTName = aggTableName.lower().replace(' ','_') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Table Name'] == table_name] - lakeTName = dfP_filt['Query'].iloc[0] - - sqlEndpointId = get_direct_lake_sql_endpoint(dataset = dataset, workspace = workspace) - - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] - - if len(dfI_filt) == 0: - print(f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.") - return - - lakehouseName = dfI_filt['Display Name'].iloc[0] - lakehouse_id = resolve_lakehouse_id(lakehouse = lakehouseName, workspace = lakehouse_workspace) - - # Generate SQL query - query = 'SELECT' - groupBy = '\nGROUP BY' - for col, agg in columns.items(): - colFilt = dfC_filt[dfC_filt['Column Name'] == col] - sourceCol = colFilt['Source'].iloc[0] - - if agg == 'GroupBy': - query = f"{query}\n{sourceCol}," - groupBy = f"{groupBy}\n{sourceCol}," - else: - query = f"{query}\n{agg}({sourceCol}) AS {sourceCol}," - - query = query[:-1] - - spark = SparkSession.builder.getOrCreate() - fromTablePath = create_abfss_path(lakehouse_id=lakehouse_id, lakehouse_workspace_id=lakehouse_workspace_id, delta_table_name=lakeTName) - df = spark.read.format("delta").load(fromTablePath) - tempTableName = 'delta_table_' + lakeTName - df.createOrReplaceTempView(tempTableName) - sqlQuery = f"{query} \n FROM {tempTableName} {groupBy}" - - sqlQuery = sqlQuery[:-1] - print(sqlQuery) - - # Save query to spark dataframe - spark_df = spark.sql(sqlQuery) - f"\nCreating/updating the '{aggLakeTName}' table in the lakehouse..." - # Write spark dataframe to delta table - aggFilePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = lakehouse_workspace_id, delta_table_name = aggLakeTName) - spark_df.write.mode('overwrite').format('delta').save(aggFilePath) - f"The '{aggLakeTName}' table has been created/updated in the lakehouse." - - # Create/update semantic model agg table - tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) - m = tom_server.Databases.GetByName(dataset).Model - f"\nUpdating the '{dataset}' semantic model..." - dfC_agg = dfC[dfC['Table Name'] == aggTableName] - - if len(dfC_agg) == 0: - print(f"Creating the '{aggTableName}' table...") - exp = m.Expressions['DatabaseQuery'] - tbl = TOM.Table() - tbl.Name = aggTableName - tbl.IsHidden = True - - ep = TOM.EntityPartitionSource() - ep.Name = aggTableName - ep.EntityName = aggLakeTName - ep.ExpressionSource = exp - - part = TOM.Partition() - part.Name = aggTableName - part.Source = ep - part.Mode = TOM.ModeType.DirectLake - - tbl.Partitions.Add(part) - - for i, r in dfC_filt.iterrows(): - scName = r['Source'] - cName = r['Column Name'] - dType = r['Data Type'] - - col = TOM.DataColumn() - col.Name = cName - col.IsHidden = True - col.SourceColumn = scName - col.DataType = System.Enum.Parse(TOM.DataType, dType) - - tbl.Columns.Add(col) - print(f"The '{aggTableName}'[{cName}] column has been added to the '{dataset}' semantic model.") - - m.Tables.Add(tbl) - print(f"The '{aggTableName}' table has been added to the '{dataset}' semantic model.") - else: - print(f"Updating the '{aggTableName}' table's columns...") - # Remove existing columns - for t in m.Tables: - tName = t.Name - for c in t.Columns: - cName = c.Name - if t.Name == aggTableName: - m.Tables[tName].Columns.Remove(cName) - # Add columns - for i, r in dfC_filt.iterrows(): - scName = r['Source'] - cName = r['Column Name'] - dType = r['Data Type'] - - col = TOM.DataColumn() - col.Name = cName - col.IsHidden = True - col.SourceColumn = scName - col.DataType = System.Enum.Parse(TOM.DataType, dType) - - m.Tables[aggTableName].Columns.Add(col) - print(f"The '{aggTableName}'[{cName}] column has been added.") - - # Create relationships - relMap = { - 'm': 'Many', - '1': 'One', - '0': 'None' - } - - print(f"\nGenerating necessary relationships...") - for i, r in dfR.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - cfb = r['Cross Filtering Behavior'] - sfb = r['Security Filtering Behavior'] - mult = r['Multiplicity'] - - crossFB = System.Enum.Parse(TOM.CrossFilteringBehavior,cfb) - secFB = System.Enum.Parse(TOM.SecurityFilteringBehavior,sfb) - fromCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, relMap.get(mult[0])) - toCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, relMap.get(mult[-1])) - - rel = TOM.SingleColumnRelationship() - rel.FromCardinality = fromCardinality - rel.ToCardinality = toCardinality - rel.IsActive = r['Active'] - rel.CrossFilteringBehavior = crossFB - rel.SecurityFilteringBehavior = secFB - rel.RelyOnReferentialIntegrity = r['Rely On Referential Integrity'] - - if fromTable == table_name: - try: - rel.FromColumn = m.Tables[aggTableName].Columns[fromColumn] - m.Relationships.Add(rel) - print(f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has been added.") - except: - print(f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has not been created.") - elif toTable == table_name: - try: - rel.ToColumn = m.Tables[aggTableName].Columns[toColumn] - m.Relationships.Add(rel) - print(f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has been added.") - except: - print(f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has not been created.") - f"Relationship creation is complete." - - # Create IF measure - f"\nCreating measure to check if the agg table can be used..." - aggChecker = 'IF(' - dfR_filt = dfR[(dfR['From Table'] == table_name) & (~dfR['From Column'].isin(columnValues))] - - for i, r in dfR_filt.iterrows(): - toTable = r['To Table'] - aggChecker = f"{aggChecker}\nISCROSSFILTERED('{toTable}') ||" - - aggChecker = aggChecker[:-3] - aggChecker = f"{aggChecker},1,0)" - print(aggChecker) - - # Todo: add IFISFILTERED clause for columns - f"\n Creating the base measures in the agg table..." - # Create base agg measures - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = """ - SELECT - [TABLE] AS [Table Name] - ,[OBJECT] AS [Object Name] - ,[OBJECT_TYPE] AS [Object Type] - ,[REFERENCED_TABLE] AS [Referenced Table] - ,[REFERENCED_OBJECT] AS [Referenced Object] - ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] - FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY - WHERE [OBJECT_TYPE] = 'MEASURE' - """) - - baseMeasures = dep[(dep['Referenced Object Type'] == 'COLUMN') & (dep['Referenced Table'] == table_name) & (dep['Referenced Object'].isin(columnValues))] - for i, r in baseMeasures.iterrows(): - tName = r['Table Name'] - mName = r['Object Name'] - cName = r['Referenced Object'] - dfM_filt = dfM[dfM['Measure Name'] == mName] - expr = dfM_filt['Measure Expression'].iloc[0] - - colFQNonAgg = format_dax_object_name(tName, cName) - colFQAgg = format_dax_object_name(aggTableName, cName) - colNQNonAgg = f"{tName}[{cName}]" - - if ' ' in tName: - newExpr = expr.replace(colFQNonAgg,colFQAgg) - else: - newExpr = expr.replace(colFQNonAgg, colFQAgg).replace(colNQNonAgg,colFQAgg) - print(expr) - print(newExpr) - - aggMName = mName + aggSuffix - measure = TOM.Measure() - measure.Name = aggMName - measure.IsHidden = True - measure.Expression = newExpr - m.Tables[aggTableName].Measures.Add(measure) - f"The '{aggMName}' measure has been created in the '{aggTableName}' table." - - # Update base detail measures - - #m.SaveChanges() - - - - - - - - - - -# Identify views used within Direct Lake model -#workspace = 'MK Demo 6' -#lakehouse = 'MyLakehouse' -#dataset = 'MigrationTest' -#lakehouse_workspace = workspace - -#dfView = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'View Name']) -#dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) -#isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - -#spark = SparkSession.builder.getOrCreate() -#views = spark.sql(f"SHOW VIEWS IN {lakehouse}").collect() -#for view in views: -# viewName = view['viewName'] -# isTemporary = view['isTemporary'] -# new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'View Name': viewName} -# dfView = pd.concat([dfView, pd.DataFrame(new_data, index=[0])], ignore_index=True) -#dfView -#lakeT = get_lakehouse_tables(lakehouse, lakehouse_workspace) -#if not dfP['Query'].isin(lakeT['Table Name'].values): -# if - diff --git a/sempy_labs/Connections.py b/sempy_labs/Connections.py deleted file mode 100644 index fe97202f..00000000 --- a/sempy_labs/Connections.py +++ /dev/null @@ -1,168 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from typing import List, Optional, Union - -def create_connection_cloud(name: str, server_name: str, database_name: str, user_name: str, password: str, privacy_level: str): - - #https://review.learn.microsoft.com/en-us/rest/api/fabric/core/connections/create-connection?branch=features%2Fdmts&tabs=HTTP - - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) - - client = fabric.FabricRestClient() - - request_body = { - "connectivityType": "ShareableCloud", - "name": name, - "connectionDetails": { - "type": "SQL", - "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] - }, - "privacyLevel": privacy_level, - "credentialDetails": { - "singleSignOnType": "None", - "connectionEncryption": "NotEncrypted", - "skipTestConnection": False, - "credentials": { - "credentialType": "Basic", - "username": user_name, - "password": password - } - } - } - - response = client.post(f"/v1/connections",json=request_body) - - if response.status_code == 200: - o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) - - return df - else: - print(response.status_code) - -def create_connection_on_prem(name: str, gateway_id: str, server_name: str, database_name: str, credentials: str, privacy_level: str): - - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Gateway ID', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) - - client = fabric.FabricRestClient() - - request_body = { - "connectivityType": "OnPremisesDataGateway", - "gatewayId": gateway_id, - "name": name, - "connectionDetails": { - "type": "SQL", - "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] - }, - "privacyLevel": privacy_level, - "credentialDetails": { - "singleSignOnType": "None", - "connectionEncryption": "NotEncrypted", - "skipTestConnection": False, - "credentials": { - "credentialType": "Windows", - "values": [ - { - "gatewayId": gateway_id, - "credentials": credentials - } - ] - } - } - } - - response = client.post(f"/v1/connections",json=request_body) - - if response.status_code == 200: - o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Gateway ID': o['gatewayId'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) - - return df - else: - print(response.status_code) - -def create_connection_vnet(name: str, gateway_id: str, server_name: str, database_name: str, user_name: str, password: str, privacy_level: str): - - df = pd.DataFrame(columns=['Connection ID', 'Connection Name', 'Gateway ID', 'Connectivity Type', 'Connection Type', 'Connection Path', 'Privacy Level', 'Credential Type', 'Single Sign On Type', 'Connection Encryption', 'Skip Test Connection']) - - client = fabric.FabricRestClient() - - request_body = { - "connectivityType": "VirtualNetworkDataGateway", - "gatewayId": gateway_id, - "name": name, - "connectionDetails": { - "type": "SQL", - "parameters": [ - { - "name": "server", - "value": server_name - }, - { - "name": "database", - "value": database_name - } - ] - }, - "privacyLevel": privacy_level, - "credentialDetails": { - "singleSignOnType": "None", - "connectionEncryption": "Encrypted", - "skipTestConnection": False, - "credentials": { - "credentialType": "Basic", - "username": user_name, - "password": password - } - } - } - - response = client.post(f"/v1/connections",json=request_body) - - if response.status_code == 200: - o = response.json() - new_data = {'Connection Id': o['id'], 'Connection Name': o['name'], 'Gateway ID': o['gatewayId'], 'Connectivity Type': o['connectivityType'], - 'Connection Type': o['connectionDetails']['type'], 'Connection Path': o['connectionDetails']['path'], 'Privacy Level': o['privacyLevel'], - 'Credential Type': o['credentialDetails']['credentialType'], 'Single Sign On Type': o['credentialDetails']['singleSignOnType'], - 'Connection Encryption': o['credentialDetails']['connectionEncryption'], 'Skip Test Connection': o['credentialDetails']['skipTestConnection'] - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Skip Test Connection'] = df['Skip Test Connection'].astype(bool) - - return df - else: - print(response.status_code) \ No newline at end of file diff --git a/sempy_labs/CreateBlankSemanticModel.py b/sempy_labs/CreateBlankSemanticModel.py deleted file mode 100644 index 80ada03f..00000000 --- a/sempy_labs/CreateBlankSemanticModel.py +++ /dev/null @@ -1,60 +0,0 @@ -import sempy -import sempy.fabric as fabric -from typing import List, Optional, Union - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def create_blank_semantic_model(dataset: str, compatibility_level: Optional[int] = 1605, workspace: Optional[str] = None): - - """ - Creates a new blank semantic model (no tables/columns etc.). - - Parameters - ---------- - dataset : str - Name of the semantic model. - compatibility_level : int - The compatibility level of the semantic model. - Defaults to 1605. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if compatibility_level < 1500: - print(f"{red_dot} Compatiblity level must be at least 1500.") - return - - tmsl = f''' - {{ - "createOrReplace": {{ - "object": {{ - "database": '{dataset}' - }}, - "database": {{ - "name": '{dataset}', - "compatibilityLevel": {compatibility_level}, - "model": {{ - "culture": "en-US", - "defaultPowerBIDataSourceVersion": "powerBI_V3" - }} - }} - }} - }} - ''' - - fabric.execute_tmsl(script = tmsl, workspace = workspace) - - return print(f"{green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace.") \ No newline at end of file diff --git a/sempy_labs/CreatePQTFile.py b/sempy_labs/CreatePQTFile.py deleted file mode 100644 index f3303b2f..00000000 --- a/sempy_labs/CreatePQTFile.py +++ /dev/null @@ -1,191 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, os, shutil -import xml.etree.ElementTree as ET -from .ListFunctions import list_tables -from .Lakehouse import lakehouse_attached -from sempy._utils._log import log -from typing import List, Optional, Union - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def create_pqt_file(dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None): - - """ - Dynamically generates a [Power Query Template](https://learn.microsoft.com/power-query/power-query-template) file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - file_name : str, default=None - The name of the Power Query Template file to be generated. - Defaults to None which resolves to 'PowerQueryTemplate'. - - Returns - ------- - - """ - - if file_name is None: - file_name = 'PowerQueryTemplate' - - lakeAttach = lakehouse_attached() - - if lakeAttach == False: - print(f"{red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - folderPath = '/lakehouse/default/Files' - subFolderPath = os.path.join(folderPath, 'pqtnewfolder') - os.makedirs(subFolderPath, exist_ok=True) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfT = list_tables(dataset, workspace) - dfE = fabric.list_expressions(dataset = dataset, workspace = workspace) - - # Check if M-partitions are used - if any(dfP['Source Type'] == 'M'): - class QueryMetadata: - def __init__(self, QueryName, QueryGroupId=None, LastKnownIsParameter=None, LastKnownResultTypeName=None, LoadEnabled=True, IsHidden=False): - self.QueryName = QueryName - self.QueryGroupId = QueryGroupId - self.LastKnownIsParameter = LastKnownIsParameter - self.LastKnownResultTypeName = LastKnownResultTypeName - self.LoadEnabled = LoadEnabled - self.IsHidden = IsHidden - - class RootObject: - def __init__(self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None): - if QueryGroups is None: - QueryGroups = [] - self.DocumentLocale = DocumentLocale - self.EngineVersion = EngineVersion - self.QueriesMetadata = QueriesMetadata - self.QueryGroups = QueryGroups - - # STEP 1: Create MashupDocument.pq - mdfileName = 'MashupDocument.pq' - mdFilePath = os.path.join(subFolderPath, mdfileName) - sb = 'section Section1;' - for table_name in dfP['Table Name'].unique(): - tName = '#\"' + table_name + '"' - sourceExpression = dfT.loc[(dfT['Name'] == table_name), 'Source Expression'].iloc[0] - refreshPolicy = dfT.loc[(dfT['Name'] == table_name), 'Refresh Policy'].iloc[0] - sourceType = dfP.loc[(dfP['Table Name'] == table_name), 'Source Type'].iloc[0] - - if sourceType == 'M' or refreshPolicy: - sb = sb + '\n' + 'shared ' + tName + ' = ' - - partitions_in_table = dfP.loc[dfP['Table Name'] == table_name, 'Partition Name'].unique() - - i=1 - for partition_name in partitions_in_table: - pSourceType = dfP.loc[(dfP['Table Name'] == table_name) & (dfP['Partition Name'] == partition_name), 'Source Type'].iloc[0] - pQuery = dfP.loc[(dfP['Table Name'] == table_name) & (dfP['Partition Name'] == partition_name), 'Query'].iloc[0] - - if pQuery is not None: - pQueryNoSpaces = pQuery.replace(' ','').replace('\n','').replace('\t','').replace('\r','') - if pQueryNoSpaces.startswith('letSource=""'): - pQuery = 'let\n\tSource = ""\nin\n\tSource' - - if pSourceType == 'M' and i==1: - sb = sb + pQuery + ';' - elif refreshPolicy and i==1: - sb = sb + sourceExpression + ';' - i+=1 - - for index, row in dfE.iterrows(): - expr = row['Expression'] - eName = row['Name'] - eName = '#"' + eName + '"' - sb = sb + '\n' + "shared " + eName + " = " + expr + ";" - - with open(mdFilePath, 'w') as file: - file.write(sb) - - # STEP 2: Create the MashupMetadata.json file - mmfileName = 'MashupMetadata.json' - mmFilePath = os.path.join(subFolderPath, mmfileName) - queryMetadata = [] - - for tName in dfP['Table Name'].unique(): - sourceType = dfP.loc[(dfP['Table Name'] == tName), 'Source Type'].iloc[0] - refreshPolicy = dfT.loc[(dfT['Name'] == tName), 'Refresh Policy'].iloc[0] - if sourceType == 'M' or refreshPolicy: - queryMetadata.append(QueryMetadata(tName, None, None, None, True, False)) - - for i, r in dfE.iterrows(): - eName = r['Name'] - eKind = r['Kind'] - if eKind == 'M': - queryMetadata.append(QueryMetadata(eName, None, None, None, True, False)) - else: - queryMetadata.append(QueryMetadata(eName, None, None, None, False, False)) - - rootObject = RootObject("en-US", "2.126.453.0", queryMetadata) - - def obj_to_dict(obj): - if isinstance(obj, list): - return [obj_to_dict(e) for e in obj] - elif hasattr(obj, "__dict__"): - return {k: obj_to_dict(v) for k, v in obj.__dict__.items()} - else: - return obj - jsonContent = json.dumps(obj_to_dict(rootObject), indent=4) - - with open(mmFilePath, 'w') as json_file: - json_file.write(jsonContent) - - # STEP 3: Create Metadata.json file - mFileName = 'Metadata.json' - mFilePath = os.path.join(subFolderPath, mFileName) - metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"} - jsonContent = json.dumps(metaData, indent=4) - - with open(mFilePath, 'w') as json_file: - json_file.write(jsonContent) - - # STEP 4: Create [Content_Types].xml file: - ns = 'http://schemas.openxmlformats.org/package/2006/content-types' - ET.register_namespace('', ns) - types = ET.Element("{%s}Types" % ns) - default1 = ET.SubElement(types, "{%s}Default" % ns, {"Extension": "json", "ContentType": "application/json"}) - default2 = ET.SubElement(types, "{%s}Default" % ns, {"Extension": "pq", "ContentType": "application/x-ms-m"}) - xmlDocument = ET.ElementTree(types) - xmlFileName = '[Content_Types].xml' - xmlFilePath = os.path.join(subFolderPath, xmlFileName) - xmlDocument.write(xmlFilePath, xml_declaration=True, encoding='utf-8', method='xml') - - # STEP 5: Zip up the 4 files - zipFileName = file_name + '.zip' - zipFilePath = os.path.join(folderPath, zipFileName) - shutil.make_archive(zipFilePath[:-4], 'zip', subFolderPath) - - # STEP 6: Convert the zip file back into a .pqt file - newExt = '.pqt' - directory = os.path.dirname(zipFilePath) - fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0] - newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt) - shutil.move(zipFilePath, newFilePath) - - #STEP 7: Delete subFolder directory which is no longer needed - shutil.rmtree(subFolderPath, ignore_errors=True) - - print(f"{green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse.") - - else: - print(f"{yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated.") \ No newline at end of file diff --git a/sempy_labs/DirectLakeSchemaCompare.py b/sempy_labs/DirectLakeSchemaCompare.py deleted file mode 100644 index 66e1fd0e..00000000 --- a/sempy_labs/DirectLakeSchemaCompare.py +++ /dev/null @@ -1,87 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint -from .GetLakehouseColumns import get_lakehouse_columns -from .ListFunctions import list_tables -from typing import List, Optional, Union - -def direct_lake_schema_compare(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if lakehouse_workspace is None: - lakehouse_workspace = workspace - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] - - if len(dfI_filt) == 0: - print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.") - return - - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model is not in Direct Lake mode.") - return - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) - - dfT.rename(columns={'Type': 'Table Type'}, inplace=True) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - dfC = pd.merge(dfC,dfP[['Table Name', 'Query']], on='Table Name', how='inner') - dfC = pd.merge(dfC,dfT[['Name', 'Table Type']], left_on='Table Name', right_on='Name', how='inner') - dfC['Full Column Name'] = format_dax_object_name(dfC['Query'], dfC['Source']) - dfC_filt = dfC[dfC['Table Type'] == 'Table'] - # Schema compare - missingtbls = dfP_filt[~dfP_filt['Query'].isin(lc['Table Name'])] - missingtbls = missingtbls[['Table Name', 'Query']] - missingtbls.rename(columns={'Query': 'Source Table'}, inplace=True) - missingcols = dfC_filt[~dfC_filt['Full Column Name'].isin(lc['Full Column Name'])] - missingcols = missingcols[['Table Name', 'Column Name', 'Type', 'Data Type', 'Source']] - missingcols.rename(columns={'Source': 'Source Column'}, inplace=True) - - if len(missingtbls) == 0: - print(f"All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - else: - print(f"The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - display(missingtbls) - if len(missingcols) == 0: - print(f"All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - else: - print(f"The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - display(missingcols) - \ No newline at end of file diff --git a/sempy_labs/GenerateReport.py b/sempy_labs/GenerateReport.py deleted file mode 100644 index d6de2531..00000000 --- a/sempy_labs/GenerateReport.py +++ /dev/null @@ -1,255 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, base64, time -from typing import List, Optional, Union - -def create_report_from_reportjson(report: str, dataset: str, report_json: str, theme_json: Optional[str] = None, workspace: Optional[str] = None): - - """ - Creates a report based on a report.json file (and an optional themes.json file). - - Parameters - ---------- - report : str - Name of the report. - dataset : str - Name of the semantic model to connect to the report. - report_json : str - The report.json file to be used to create the report. - theme_json : str, default=None - The theme.json file to be used for the theme of the report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objectType = 'Report' - - dfI_m = fabric.list_items(workspace = workspace, type = 'SemanticModel') - dfI_model = dfI_m[(dfI_m['Display Name'] == dataset)] - - if len(dfI_model) == 0: - print(f"ERROR: The '{dataset}' semantic model does not exist in the '{workspace}' workspace.") - return - - datasetId = dfI_model['Id'].iloc[0] - - dfI_r = fabric.list_items(workspace = workspace, type = 'Report') - dfI_rpt = dfI_r[(dfI_r['Display Name'] == report)] - - if len(dfI_rpt) > 0: - print(f"WARNING: '{report}' already exists as a report in the '{workspace}' workspace.") - return - - client = fabric.FabricRestClient() - defPBIR = { - "version": "1.0", - "datasetReference": { - "byPath": None, - "byConnection": { - "connectionString": None, - "pbiServiceModelId": None, - "pbiModelVirtualServerName": "sobe_wowvirtualserver", - "pbiModelDatabaseName": datasetId, - "name": "EntityDataSource", - "connectionType": "pbiServiceXmlaStyleLive" - } - } -} - - def conv_b64(file): - - loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - - return f - - definitionPBIR = conv_b64(defPBIR) - payloadReportJson = conv_b64(report_json) - - if theme_json == None: - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": definitionPBIR, - "payloadType": "InlineBase64" - } - ] - - } - } - else: - payloadThemeJson = conv_b64(theme_json) - themeID = theme_json['payload']['blob']['displayName'] - themePath = 'StaticResources/SharedResources/BaseThemes/' + themeID + '.json' - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": themePath, - "payload": payloadThemeJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": definitionPBIR, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/items",json=request_body) - - if response.status_code == 201: - print('Report creation succeeded') - print(response.json()) - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - print('Report creation succeeded') - print(response.json()) - -def update_report_from_reportjson(report: str, report_json: str, workspace: Optional[str] = None): - - """ - Updates a report based on a report.json file. - - Parameters - ---------- - report : str - Name of the report. - report_json : str - The report.json file to be used to update the report. - workspace : str, default=None - The Fabric workspace name in which the report resides. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objectType = 'Report' - - dfR = fabric.list_reports(workspace = workspace) - dfR_filt = dfR[(dfR['Name'] == report) & (dfR['Report Type'] == 'PowerBIReport')] - - if len(dfR_filt) == 0: - print(f"The '{report}' report does not exist in the '{workspace}' workspace.") - return - - reportId = dfR_filt['Id'].iloc[0] - client = fabric.FabricRestClient() - - response = client.post(f"/v1/workspaces/{workspace_id}/items/{reportId}/getDefinition") - df_items = pd.json_normalize(response.json()['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'definition.pbir'] - rptDefFile = df_items_filt['payload'].iloc[0] - #datasetId = dfR_filt['Dataset Id'].iloc[0] - #datasetWorkspaceId = dfR_filt['Dataset Workspace Id'].iloc[0] - - - #defPBIR = { - #"version": "1.0", - #"datasetReference": { - # "byPath": None, - # "byConnection": { - # "connectionString": None, - # "pbiServiceModelId": None, - # "pbiModelVirtualServerName": "sobe_wowvirtualserver", - # "pbiModelDatabaseName": datasetId, - # "name": "EntityDataSource", - # "connectionType": "pbiServiceXmlaStyleLive" - # } - #} -#} - - def conv_b64(file): - - loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - - return f - - #definitionPBIR = conv_b64(defPBIR) - payloadReportJson = conv_b64(report_json) - - request_body = { - 'displayName': report, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "report.json", - "payload": payloadReportJson, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbir", - "payload": rptDefFile, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/reports/{reportId}/updateDefinition",json=request_body) - - if response.status_code == 201: - print(f"The '{report}' report has been successfully updated.") - #print(response.json()) - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{report}' report has been successfully updated.") - #print(response.json()) \ No newline at end of file diff --git a/sempy_labs/GenerateSemanticModel.py b/sempy_labs/GenerateSemanticModel.py deleted file mode 100644 index 7ed53dae..00000000 --- a/sempy_labs/GenerateSemanticModel.py +++ /dev/null @@ -1,137 +0,0 @@ -import sempy -import sempy.fabric as fabric -import json, base64, time -from .GetSemanticModelBim import get_semantic_model_bim -from typing import List, Optional, Union - -def create_semantic_model_from_bim(dataset: str, bim_file: str, workspace: Optional[str] = None): - - """ - Creates a new semantic model based on a Model.bim file. - - Parameters - ---------- - dataset : str - Name of the semantic model. - bim_file : str - The model.bim file. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objectType = 'SemanticModel' - - dfI = fabric.list_items(workspace = workspace, type = objectType) - dfI_filt = dfI[(dfI['Display Name'] == dataset)] - - if len(dfI_filt) > 0: - print(f"WARNING: '{dataset}' already exists as a semantic model in the '{workspace}' workspace.") - return - - client = fabric.FabricRestClient() - defPBIDataset = { - "version": "1.0", - "settings": {} - } - - def conv_b64(file): - - loadJson = json.dumps(file) - f = base64.b64encode(loadJson.encode('utf-8')).decode('utf-8') - - return f - - payloadPBIDefinition = conv_b64(defPBIDataset) - payloadBim = conv_b64(bim_file) - - request_body = { - 'displayName': dataset, - 'type': objectType, - 'definition': { - "parts": [ - { - "path": "model.bim", - "payload": payloadBim, - "payloadType": "InlineBase64" - }, - { - "path": "definition.pbidataset", - "payload": payloadPBIDefinition, - "payloadType": "InlineBase64" - } - ] - - } - } - - response = client.post(f"/v1/workspaces/{workspace_id}/items",json=request_body) - - if response.status_code == 201: - print(f"The '{dataset}' semantic model has been created within the '{workspace}' workspace.") - print(response.json()) - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{dataset}' semantic model has been created within the '{workspace}' workspace.") - print(response.json()) - -def deploy_semantic_model(dataset: str, new_dataset: Optional[str] = None, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Deploys a semantic model based on an existing semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model to deploy. - new_dataset: str - Name of the new semantic model to be created. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str, default=None - The Fabric workspace name in which the new semantic model will be deployed. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if new_dataset is None: - new_dataset = dataset - - if new_dataset == dataset and new_dataset_workspace == workspace: - print(f"The 'dataset' and 'new_dataset' parameters have the same value. And, the 'workspace' and 'new_dataset_workspace' parameters have the same value. At least one of these must be different. Please update the parameters.") - return - - bim = get_semantic_model_bim(dataset = dataset, workspace = workspace) - - create_semantic_model_from_bim(dataset = new_dataset, bim_file = bim, workspace = new_dataset_workspace) \ No newline at end of file diff --git a/sempy_labs/GetLakehouseTables.py b/sempy_labs/GetLakehouseTables.py deleted file mode 100644 index f14f60fa..00000000 --- a/sempy_labs/GetLakehouseTables.py +++ /dev/null @@ -1,159 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from pyspark.sql import SparkSession -import pyarrow.parquet as pq -import datetime -from .HelperFunctions import resolve_lakehouse_id, resolve_lakehouse_name -from .Guardrails import get_sku_size, get_directlake_guardrails_for_sku -from .Lakehouse import lakehouse_attached -from typing import List, Optional, Union - -def get_lakehouse_tables(lakehouse: Optional[str] = None, workspace: Optional[str] = None, extended: Optional[bool] = False, count_rows: Optional[bool] = False, export: Optional[bool] = False): - - """ - Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. - - Parameters - ---------- - lakehouse : str, default=None - The Fabric lakehouse. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - extended : bool, default=False - Obtains additional columns relevant to the size of each table. - count_rows : bool, default=False - Obtains a row count for each lakehouse table. - export : bool, default=False - Exports the resulting dataframe to a delta table in the lakehouse. - - Returns - ------- - pandas.DataFrame - Shows the tables/columns within a lakehouse and their properties. - """ - - df = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'Table Name', 'Format', 'Type', 'Location']) - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - - if count_rows: #Setting countrows defaults to extended=True - extended=True - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables") - - for i in response.json()['data']: - tName = i['name'] - tType = i['type'] - tFormat = i['format'] - tLocation = i['location'] - if extended == False: - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - sku_value = get_sku_size(workspace) - guardrail = get_directlake_guardrails_for_sku(sku_value) - - spark = SparkSession.builder.getOrCreate() - - intColumns = ['Files', 'Row Groups', 'Table Size'] - if tType == 'Managed' and tFormat == 'delta': - detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0] - num_files = detail_df.numFiles - size_in_bytes = detail_df.sizeInBytes - - delta_table_path = f"Tables/{tName}" - latest_files = spark.read.format('delta').load(delta_table_path).inputFiles() - file_paths = [f.split("/")[-1] for f in latest_files] - - # Handle FileNotFoundError - num_rowgroups = 0 - for filename in file_paths: - try: - num_rowgroups += pq.ParquetFile(f"/lakehouse/default/{delta_table_path}/{filename}").num_row_groups - except FileNotFoundError: - continue - - if count_rows: - num_rows = spark.table(tName).count() - intColumns.append('Row Count') - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation, 'Files': num_files, 'Row Groups': num_rowgroups, 'Row Count': num_rows, 'Table Size': size_in_bytes } - else: - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Format': tFormat, 'Type': tType, 'Location': tLocation, 'Files': num_files, 'Row Groups': num_rowgroups, 'Table Size': size_in_bytes } - - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df[intColumns] = df[intColumns].astype(int) - - df['SKU'] = guardrail['Fabric SKUs'].iloc[0] - df['Parquet File Guardrail'] = guardrail['Parquet files per table'].iloc[0] - df['Row Group Guardrail'] = guardrail['Row groups per table'].iloc[0] - df['Row Count Guardrail'] = guardrail['Rows per table (millions)'].iloc[0] * 1000000 - - df['Parquet File Guardrail Hit'] = df['Files'] > df['Parquet File Guardrail'] - df['Row Group Guardrail Hit'] = df['Row Groups'] > df['Row Group Guardrail'] - - if count_rows: - df['Row Count Guardrail Hit'] = df['Row Count'] > df['Row Count Guardrail'] - - if export: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - spark = SparkSession.builder.getOrCreate() - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) - lakeTName = 'lakehouse_table_details' - lakeT_filt = df[df['Table Name'] == lakeTName] - - query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" - - if len(lakeT_filt) == 0: - runId = 1 - else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 - - export_df = df.copy() - - cols = ['Files', 'Row Groups', 'Row Count', 'Table Size', 'SKU', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail', 'Parquet File Guardrail Hit', 'Row Group Guardrail Hit', 'Row Count Guardrail Hit'] - - for c in cols: - if c not in export_df: - if c in ['Files', 'Row Groups', 'Row Count', 'Table Size', 'Parquet File Guardrail', 'Row Group Guardrail', 'Row Count Guardrail']: - export_df[c] = 0 - export_df[c] = export_df[c].astype(int) - elif c in ['SKU']: - export_df[c] = None - export_df[c] = export_df[c].astype(str) - elif c in ['Parquet File Guardrail Hit', 'Row Group Guardrail Hit', 'Row Count Guardrail Hit']: - export_df[c] = False - export_df[c] = export_df[c].astype(bool) - - print(f"Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n") - now = datetime.datetime.now() - export_df['Timestamp'] = now - export_df['RunId'] = runId - - export_df.columns = export_df.columns.str.replace(' ', '_') - spark_df = spark.createDataFrame(export_df) - spark_df.write.mode('append').format('delta').saveAsTable(lakeTName) - print(f"\u2022 Lakehouse table properties have been saved to the '{lakeTName}' delta table.") - - return df \ No newline at end of file diff --git a/sempy_labs/GetMeasureDependencies.py b/sempy_labs/GetMeasureDependencies.py deleted file mode 100644 index 6a1ba50d..00000000 --- a/sempy_labs/GetMeasureDependencies.py +++ /dev/null @@ -1,149 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import format_dax_object_name -from typing import List, Optional, Union - -def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): - - """ - Shows all dependencies for all measures in a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - Shows all dependencies for all measures in the semantic model. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = - """ - SELECT - [TABLE] AS [Table Name] - ,[OBJECT] AS [Object Name] - ,[OBJECT_TYPE] AS [Object Type] - ,[REFERENCED_TABLE] AS [Referenced Table] - ,[REFERENCED_OBJECT] AS [Referenced Object] - ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] - FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY - WHERE [OBJECT_TYPE] = 'MEASURE' - """) - - dep['Object Type'] = dep['Object Type'].str.capitalize() - dep['Referenced Object Type'] = dep['Referenced Object Type'].str.capitalize() - - dep['Full Object Name'] = format_dax_object_name(dep['Table Name'], dep['Object Name']) - dep['Referenced Full Object Name'] = format_dax_object_name(dep['Referenced Table'], dep['Referenced Object']) - dep['Parent Node'] = dep['Object Name'] - - df = dep - - df['Done'] = df.apply(lambda row: False if row['Referenced Object Type'] == 'Measure' else True, axis=1) - - while(any(df['Done'] == False)): - for i, r in df.iterrows(): - rObjFull = r['Referenced Full Object Name'] - rObj = r['Referenced Object'] - if r['Done'] == False: - dep_filt = dep[dep['Full Object Name'] == rObjFull] - - for index, dependency in dep_filt.iterrows(): - d = True - if dependency[5] == 'Measure': - d = False - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[4], 'Referenced Table': dependency[3], 'Referenced Object Type': dependency[5], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) - else: - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[5], 'Referenced Table': dependency[4], 'Referenced Object Type': dependency[6], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) - - df.loc[i, 'Done'] = True - - df = df.drop(['Done','Full Object Name','Referenced Full Object Name'], axis=1) - - return df - -def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): - - """ - Shows all dependencies for all objects in a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - Shows all dependencies for all objects in the semantic model. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dep = fabric.evaluate_dax(dataset = dataset, workspace = workspace, dax_string = - """ - SELECT - [TABLE] AS [Table Name] - ,[OBJECT] AS [Object Name] - ,[OBJECT_TYPE] AS [Object Type] - ,[EXPRESSION] AS [Expression] - ,[REFERENCED_TABLE] AS [Referenced Table] - ,[REFERENCED_OBJECT] AS [Referenced Object] - ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] - FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY - """) - - dep['Object Type'] = dep['Object Type'].str.replace('_',' ').str.title() - dep['Referenced Object Type'] = dep['Referenced Object Type'].str.replace('_',' ').str.title() - - dep['Full Object Name'] = format_dax_object_name(dep['Table Name'], dep['Object Name']) - dep['Referenced Full Object Name'] = format_dax_object_name(dep['Referenced Table'], dep['Referenced Object']) - dep['Parent Node'] = dep['Object Name'] - - df = dep - - objs = ['Measure','Calc Column', 'Calculation Item', 'Calc Table'] - - df['Done'] = df.apply(lambda row: False if row['Referenced Object Type'] in objs else True, axis=1) - - while(any(df['Done'] == False)): - for i, r in df.iterrows(): - rObjFull = r['Referenced Full Object Name'] - rObj = r['Referenced Object'] - if r['Done'] == False: - dep_filt = dep[dep['Full Object Name'] == rObjFull] - - for index, dependency in dep_filt.iterrows(): - d = True - if dependency[5] in objs: - d = False - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[4], 'Referenced Table': dependency[3], 'Referenced Object Type': dependency[5], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) - else: - df = pd.concat([df, pd.DataFrame([{'Table Name': r['Table Name'], 'Object Name': r['Object Name'], 'Object Type': r['Object Type'] - , 'Referenced Object': dependency[5], 'Referenced Table': dependency[4], 'Referenced Object Type': dependency[6], 'Done': d, 'Full Object Name': r['Full Object Name'], 'Referenced Full Object Name': dependency[7],'Parent Node': rObj }])], ignore_index=True) - - df.loc[i, 'Done'] = True - - df = df.drop(['Done'], axis=1) - - return df \ No newline at end of file diff --git a/sempy_labs/GetSemanticModelBim.py b/sempy_labs/GetSemanticModelBim.py deleted file mode 100644 index 0959dadb..00000000 --- a/sempy_labs/GetSemanticModelBim.py +++ /dev/null @@ -1,79 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, os, time, base64 -from .HelperFunctions import resolve_lakehouse_name -from .Lakehouse import lakehouse_attached -from typing import List, Optional, Union - -def get_semantic_model_bim(dataset: str, workspace: Optional[str] = None, save_to_file_name: Optional[str] = None): - - """ - Extracts the Model.bim file for a given semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - save_to_file_name : str, default=None - If specified, saves the Model.bim as a file in the lakehouse attached to the notebook. - - Returns - ------- - str - The Model.bim file for the semantic model. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - objType = 'SemanticModel' - client = fabric.FabricRestClient() - itemList = fabric.list_items(workspace = workspace, type = objType) - itemListFilt = itemList[(itemList['Display Name'] == dataset)] - itemId = itemListFilt['Id'].iloc[0] - response = client.post(f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition") - - if response.status_code == 200: - res = response.json() - elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': - time.sleep(3) - response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - response = client.get(f"/v1/operations/{operationId}/result") - res = response.json() - df_items = pd.json_normalize(res['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'model.bim'] - payload = df_items_filt['payload'].iloc[0] - bimFile = base64.b64decode(payload).decode('utf-8') - bimJson = json.loads(bimFile) - - if save_to_file_name is not None: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - folderPath = '/lakehouse/default/Files' - fileExt = '.bim' - if not save_to_file_name.endswith(fileExt): - save_to_file_name = save_to_file_name + fileExt - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: - json.dump(bimJson, json_file, indent=4) - print(f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n") - - return bimJson \ No newline at end of file diff --git a/sempy_labs/GetSharedExpression.py b/sempy_labs/GetSharedExpression.py deleted file mode 100644 index 032cf288..00000000 --- a/sempy_labs/GetSharedExpression.py +++ /dev/null @@ -1,50 +0,0 @@ -import sempy -import sempy.fabric as fabric -from .HelperFunctions import resolve_lakehouse_name -from .ListFunctions import list_lakehouses -from typing import List, Optional, Union - -def get_shared_expression(lakehouse: Optional[str] = None, workspace: Optional[str] = None): - - """ - Dynamically generates the M expression used by a Direct Lake model for a given lakehouse. - - Parameters - ---------- - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - str - Shows the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id) - - dfL = list_lakehouses(workspace = workspace) - lakeDetail = dfL[dfL['Lakehouse Name'] == lakehouse] - - sqlEPCS = lakeDetail['SQL Endpoint Connection String'].iloc[0] - sqlepid = lakeDetail['SQL Endpoint ID'].iloc[0] - provStatus = lakeDetail['SQL Endpoint Provisioning Status'].iloc[0] - - if provStatus == 'InProgress': - print(f"The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned.") - return - - sh = 'let\n\tdatabase = Sql.Database("' + sqlEPCS + '", "' + sqlepid + '")\nin\n\tdatabase' - - return sh \ No newline at end of file diff --git a/sempy_labs/LogAnalytics.py b/sempy_labs/LogAnalytics.py deleted file mode 100644 index 8b4cacad..00000000 --- a/sempy_labs/LogAnalytics.py +++ /dev/null @@ -1,68 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import resolve_dataset_id -from typing import List, Optional, Union -from sempy._utils._log import log - -@log -def run_dax(dataset: str, dax_query: str, user_name: Optional[str] = None, workspace: Optional[str] = None): - - """ - Runs a DAX query against a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - dax_query : str - The DAX query. - user_name : str | None - The user name (i.e. hello@goodbye.com). - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe holding the result of the DAX query. - """ - - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries-in-group - - if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - dataset_id = resolve_dataset_id(dataset = dataset, workspace = workspace) - - if user_name is None: - request_body = { - "queries": [ - { - "query": dax_query - } - ] - } - else: - request_body = { - "queries": [ - { - "query": dax_query - } - ], - "impersonatedUserName": user_name - } - - client = fabric.PowerBIRestClient() - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries", json = request_body) - data = response.json()['results'][0]['tables'] - column_names = data[0]['rows'][0].keys() - data_rows = [row.values() for item in data for row in item['rows']] - df = pd.DataFrame(data_rows, columns=column_names) - - return df \ No newline at end of file diff --git a/sempy_labs/MeasureDependencyTree.py b/sempy_labs/MeasureDependencyTree.py deleted file mode 100644 index 17a3b649..00000000 --- a/sempy_labs/MeasureDependencyTree.py +++ /dev/null @@ -1,84 +0,0 @@ -import sempy -import sempy.fabric as fabric -from anytree import Node, RenderTree -from .GetMeasureDependencies import get_measure_dependencies -from typing import List, Optional, Union -from sempy._utils._log import log - -@log -def measure_dependency_tree(dataset: str, measure_name: str, workspace: Optional[str] = None): - - """ - Prints a measure dependency tree of all dependent objects for a measure in a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - measure_name : str - Name of the measure. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfM_filt = dfM[dfM['Measure Name'] == measure_name] - - if len(dfM_filt) == 0: - print(f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace.") - return - - md = get_measure_dependencies(dataset, workspace) - df_filt = md[md['Object Name'] == measure_name] - - # Create a dictionary to hold references to nodes - node_dict = {} - measureIcon = "\u2211" - tableIcon = "\u229E" - columnIcon = "\u229F" - - # Populate the tree - for _, row in df_filt.iterrows(): - #measure_name = row['Object Name'] - ref_obj_table_name = row['Referenced Table'] - ref_obj_name = row['Referenced Object'] - ref_obj_type = row['Referenced Object Type'] - parent_node_name = row['Parent Node'] - - # Create or get the parent node - parent_node = node_dict.get(parent_node_name) - if parent_node is None: - parent_node = Node(parent_node_name) - node_dict[parent_node_name] = parent_node - parent_node.custom_property = measureIcon + " " - - # Create the child node - child_node_name = ref_obj_name - child_node = Node(child_node_name, parent=parent_node) - if ref_obj_type == 'Column': - child_node.custom_property = columnIcon + " '" + ref_obj_table_name + "'" - elif ref_obj_type == 'Table': - child_node.custom_property = tableIcon + " " - elif ref_obj_type == 'Measure': - child_node.custom_property = measureIcon + " " - - # Update the dictionary with the child node - node_dict[child_node_name] = child_node - - # Visualize the tree structure using RenderTree - for pre, _, node in RenderTree(node_dict[measure_name]): - if tableIcon in node.custom_property: - print(f"{pre}{node.custom_property}'{node.name}'") - else: - print(f"{pre}{node.custom_property}[{node.name}]") \ No newline at end of file diff --git a/sempy_labs/MigrateCalcTablesToLakehouse.py b/sempy_labs/MigrateCalcTablesToLakehouse.py deleted file mode 100644 index 67317272..00000000 --- a/sempy_labs/MigrateCalcTablesToLakehouse.py +++ /dev/null @@ -1,311 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import re, datetime, time -from .GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import resolve_lakehouse_name, resolve_lakehouse_id, create_abfss_path -from .TOM import connect_semantic_model -from pyspark.sql import SparkSession -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_calc_tables_to_lakehouse(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) - else: - lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace) - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - #dfC['Column Object'] = "'" + dfC['Table Name'] + "'[" + dfC['Column Name'] + "]" - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[~dfP_filt['Query'].str.contains('NAMEOF')] #Remove field parameters - #dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] - lakeTables = get_lakehouse_tables(lakehouse, lakehouse_workspace) - - # Do not execute the function if lakehouse tables already exist with the same name - killFunction = False - for i, r in dfP_filt.iterrows(): - tName = r['Table Name'] - dtName = tName.replace(' ', '_') - - if dtName in lakeTables['Table Name'].values: - print(f"{red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse}' lakehouse in the '{workspace}' workspace.") - killFunction = True - - if killFunction: - return - - spark = SparkSession.builder.getOrCreate() - - if len(dfP_filt) == 0: - print(f"{yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=dataset, workspace = workspace, readonly=True) as tom: - success = True - for t in tom.model.Tables: - if tom.is_auto_date_table(table_name = t.Name): - print(f"{yellow_dot} The '{t.Name}' table is an auto-datetime table and is not supported in the Direct Lake migration process. Please create a proper Date/Calendar table in your lakehoues and use it in your Direct Lake model.") - else: - for p in t.Partitions: - if str(p.SourceType) == 'Calculated': - query = p.Source.Expression - if 'NAMEOF' not in query: # exclude field parameters - daxQuery = '' - if query.lower().startswith('calendar') and any(str(c.Type) == 'Calculated' for c in t.Columns): - daxQuery = f"ADDCOLUMNS(\n{query}," - for c in t.Columns: - if str(c.Type) == 'Calculated': - expr = c.Expression - expr = expr.replace(f"'{t.Name}'",'').replace(f"{t.Name}[Date]",'[Date]') - expr = expr.replace('[MonthNo]','MONTH([Date])').replace('[QuarterNo]','INT((MONTH([Date]) + 2) / 3)') - daxQuery = f"{daxQuery}\n\"{c.Name}\",{expr}," - daxQuery = 'EVALUATE\n' + daxQuery.rstrip(',') + '\n)' - else: - daxQuery = f"EVALUATE\n{query}" - daxQueryTopN = daxQuery.replace('EVALUATE\n', 'EVALUATE\nTOPN(1,') + ')' - - try: - df = fabric.evaluate_dax(dataset = dataset, dax_string = daxQueryTopN, workspace = workspace) - - for col in df.columns: - pattern = r"\[([^\]]+)\]" - - matches = re.findall(pattern, col) - new_column_name = matches[0].replace(' ','') - - df.rename(columns={col: new_column_name}, inplace=True) - - try: - dataType = next(str(c.DataType) for c in tom.model.Tables[t.Name].Columns if str(c.Type) == 'CalculatedTableColumn' and c.SourceColumn == col) - except: - dataType = next(str(c.DataType) for c in tom.model.Tables[t.Name].Columns if str(c.Type) == 'Calculated' and c.Name == new_column_name) - - if dataType == 'Int64': - df[new_column_name] = df[new_column_name].astype(int) - elif dataType in ['Decimal', 'Double']: - df[new_column_name] = df[new_column_name].astype(float) - elif dataType == 'Boolean': - df[new_column_name] = df[new_column_name].astype(bool) - elif dataType == 'DateTime': - df[new_column_name] = pd.to_datetime(df[new_column_name]) - - delta_table_name = t.Name.replace(' ','_').lower() - - spark_df = spark.createDataFrame(df) - filePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = lakehouse_workspace_id, delta_table_name = delta_table_name) - spark_df.write.mode('overwrite').format('delta').save(filePath) - - start_time2 = datetime.datetime.now() - timeout2 = datetime.timedelta(minutes=1) - success2 = False - - while not success2: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom2: - success2 = True - tom2.set_annotation(object = tom2.model, name = t.Name, value = daxQuery) - except Exception as e: - if datetime.datetime.now() - start_time2 > timeout2: - break - time.sleep(1) - - print(f"{green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") - except: - print(f"{red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) - -@log -def migrate_field_parameters(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Migrates field parameters from one semantic model to another. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - from .HelperFunctions import format_dax_object_name - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[dfP_filt['Query'].str.contains('NAMEOF')] # Only field parameters - dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] - - if len(dfP_filt) == 0: - print(f"{green_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no field parameters.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, workspace=new_dataset_workspace, readonly=False) as tom: - success = True - - for i,r in dfP_filt.iterrows(): - tName = r['Table Name'] - query = r['Query'] - - # For field parameters, remove calc columns from the query - rows = query.strip().split('\n') - filtered_rows = [row for row in rows if not any(value in row for value in dfC_CalcColumn['Column Object'].values)] - updated_query_string = '\n'.join(filtered_rows) - - # Remove extra comma - lines = updated_query_string.strip().split('\n') - lines[-2] = lines[-2].rstrip(',') - expr = '\n'.join(lines) - - try: - par = TOM.Partition() - par.Name = tName - - parSource = TOM.CalculatedPartitionSource() - par.Source = parSource - parSource.Expression = expr - - tbl = TOM.Table() - tbl.Name = tName - tbl.Partitions.Add(par) - - columns = ['Value1', 'Value2', 'Value3'] - - for colName in columns: - col = TOM.CalculatedTableColumn() - col.Name = colName - col.SourceColumn = '[' + colName + ']' - col.DataType = TOM.DataType.String - - tbl.Columns.Add(col) - - tom.model.Tables.Add(tbl) - - ep = TOM.JsonExtendedProperty() - ep.Name = 'ParameterMetadata' - ep.Value = '{"version":3,"kind":2}' - - rcd = TOM.RelatedColumnDetails() - gpc = TOM.GroupByColumn() - gpc.GroupingColumn = tom.model.Tables[tName].Columns['Value2'] - rcd.GroupByColumns.Add(gpc) - - # Update column properties - tom.model.Tables[tName].Columns['Value2'].IsHidden = True - tom.model.Tables[tName].Columns['Value3'].IsHidden = True - tom.model.Tables[tName].Columns['Value3'].DataType = TOM.DataType.Int64 - tom.model.Tables[tName].Columns['Value1'].SortByColumn = tom.model.Tables[tName].Columns['Value3'] - tom.model.Tables[tName].Columns['Value2'].SortByColumn = tom.model.Tables[tName].Columns['Value3'] - tom.model.Tables[tName].Columns['Value2'].ExtendedProperties.Add(ep) - tom.model.Tables[tName].Columns['Value1'].RelatedColumnDetails = rcd - - dfC_filt1 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value1]')] - col1 = dfC_filt1['Column Name'].iloc[0] - dfC_filt2 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value2]')] - col2 = dfC_filt2['Column Name'].iloc[0] - dfC_filt3 = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == '[Value3]')] - col3 = dfC_filt3['Column Name'].iloc[0] - - tom.model.Tables[tName].Columns['Value1'].Name = col1 - tom.model.Tables[tName].Columns['Value2'].Name = col2 - tom.model.Tables[tName].Columns['Value3'].Name = col3 - - print(f"{green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace}' workspace.") - except: - print(f"{red_dot} The '{tName}' table has not been added as a field parameter.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateCalcTablesToSemanticModel.py b/sempy_labs/MigrateCalcTablesToSemanticModel.py deleted file mode 100644 index 6b7e04cb..00000000 --- a/sempy_labs/MigrateCalcTablesToSemanticModel.py +++ /dev/null @@ -1,123 +0,0 @@ -import sempy -import sempy.fabric as fabric -import re, datetime, time -from .GetLakehouseTables import get_lakehouse_tables -from .HelperFunctions import resolve_lakehouse_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_calc_tables_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None ): - - """ - Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Get calc tables but not field parameters - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[(dfP['Source Type'] == 'Calculated')] - dfP_filt = dfP_filt[~dfP_filt['Query'].str.contains('NAMEOF')] - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - lc = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) - # Get all calc table columns of calc tables not including field parameters - dfC_filt = dfC[(dfC['Table Name'].isin(dfP_filt['Table Name']))]# & (dfC['Type'] == 'CalculatedTableColumn')] - #dfA = list_annotations(new_dataset, new_dataset_workspace) - #dfA_filt = dfA[(dfA['Object Type'] == 'Model') & ~ (dfA['Annotation Value'].str.contains('NAMEOF'))] - - if len(dfP_filt) == 0: - print(f"{green_dot} The '{dataset}' semantic model has no calculated tables.") - return - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - for tName in dfC_filt['Table Name'].unique(): - if tName.lower() in lc['Table Name'].values: - - try: - tom.model.Tables[tName] - except: - tom.add_table(name = tName) - tom.add_entity_partition(table_name=tName, entity_name=tName.replace(' ','_').lower()) - - columns_in_table = dfC_filt.loc[dfC_filt['Table Name'] == tName, 'Column Name'].unique() - - for cName in columns_in_table: - scName = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Source'].iloc[0] - cDataType = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Data Type'].iloc[0] - cType = dfC.loc[(dfC['Table Name'] == tName) & (dfC['Column Name'] == cName), 'Type'].iloc[0] - - #av = tom.get_annotation_value(object = tom.model, name = tName) - - #if cType == 'CalculatedTableColumn': - #lakeColumn = scName.replace(' ','_') - #elif cType == 'Calculated': - pattern = r'\[([^]]+)\]' - - matches = re.findall(pattern, scName) - lakeColumn = matches[0].replace(' ','') - try: - tom.model.Tables[tName].Columns[cName] - except: - tom.add_data_column(table_name = tName, column_name=cName, source_column=lakeColumn, data_type=cDataType) - print(f"{green_dot} The '{tName}'[{cName}] column has been added.") - - print(f"\n{green_dot} All viable calculated tables have been added to the model.") - - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateModelObjectsToSemanticModel.py b/sempy_labs/MigrateModelObjectsToSemanticModel.py deleted file mode 100644 index aa984255..00000000 --- a/sempy_labs/MigrateModelObjectsToSemanticModel.py +++ /dev/null @@ -1,324 +0,0 @@ -import sempy -import sempy.fabric as fabric -import re, datetime, time -from .ListFunctions import list_tables -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_model_objects_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - import System - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspaceId = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfM = fabric.list_measures(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - dfRole = fabric.get_roles(dataset = dataset, workspace = workspace) - dfRLS = fabric.get_row_level_security_permissions(dataset = dataset, workspace = workspace) - dfCI = fabric.list_calculation_items(dataset = dataset, workspace = workspace) - dfP = fabric.list_perspectives(dataset = dataset, workspace = workspace) - dfTranslation = fabric.list_translations(dataset = dataset, workspace = workspace) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfPar = fabric.list_partitions(dataset = dataset, workspace = workspace) - - dfP_cc = dfPar[(dfPar['Source Type'] == 'Calculated')] - dfP_fp = dfP_cc[dfP_cc['Query'].str.contains('NAMEOF')] - dfC_fp = dfC[dfC['Table Name'].isin(dfP_fp['Table Name'].values)] - - print(f"{in_progress} Updating '{new_dataset}' based on '{dataset}'...") - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - - isDirectLake = any(str(p.Mode) == 'DirectLake' for t in tom.model.Tables for p in t.Partitions) - - print(f"\n{in_progress} Updating table properties...") - for t in tom.model.Tables: - t.IsHidden = bool(dfT.loc[dfT['Name'] == t.Name, 'Hidden'].iloc[0]) - t.Description = dfT.loc[dfT['Name'] == t.Name, 'Description'].iloc[0] - t.DataCategory = dfT.loc[dfT['Name'] == t.Name, 'Data Category'].iloc[0] - - print(f"{green_dot} The '{t.Name}' table's properties have been updated.") - - print(f"\n{in_progress} Updating column properties...") - for t in tom.model.Tables: - if t.Name not in dfP_fp['Table Name'].values: # do not include field parameters - dfT_filtered = dfT[dfT['Name'] == t.Name] - tType = dfT_filtered['Type'].iloc[0] - for c in t.Columns: - if not c.Name.startswith('RowNumber-'): - dfC_filt = dfC[(dfC['Table Name'] == t.Name) & (dfC['Column Name'] == c.Name)] - cName = dfC_filt['Column Name'].iloc[0] - c.Name = cName - if tType == 'Table': - c.SourceColumn = cName.replace(' ', '_') - c.IsHidden = bool(dfC_filt['Hidden'].iloc[0]) - c.DataType = System.Enum.Parse(TOM.DataType, dfC_filt['Data Type'].iloc[0]) - c.DisplayFolder = dfC_filt['Display Folder'].iloc[0] - c.FormatString = dfC_filt['Format String'].iloc[0] - c.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, dfC_filt['Summarize By'].iloc[0]) - c.DataCategory = dfC_filt['Data Category'].iloc[0] - c.IsKey = bool(dfC_filt['Key'].iloc[0]) - sbc = dfC_filt['Sort By Column'].iloc[0] - - if sbc != None: - try: - c.SortByColumn = tom.model.Tables[t.Name].Columns[sbc] - except: - print(f"{red_dot} Failed to create '{sbc}' as a Sort By Column for the '{c.Name}' in the '{t.Name}' table.") - print(f"{green_dot} The '{t.Name}'[{c.Name}] column's properties have been updated.") - - print(f"\n{in_progress} Creating hierarchies...") - dfH_grouped = dfH.groupby(['Table Name', 'Hierarchy Name', 'Hierarchy Hidden', 'Hierarchy Description']).agg({'Level Name': list, 'Column Name': list}).reset_index() - - for i, r in dfH_grouped.iterrows(): - tName = r['Table Name'] - hName = r['Hierarchy Name'] - hDesc = r['Hierarchy Description'] - hHid = bool(r['Hierarchy Hidden']) - cols = r['Column Name'] - lvls = r['Level Name'] - - try: - tom.model.Tables[tName].Hierarchies[hName] - except: - tom.add_hierarchy(table_name = tName, hierarchy_name=hName, hierarchy_description=hDesc, hierarchy_hidden=hHid, columns=cols, levels=lvls) - print(f"{green_dot} The '{hName}' hierarchy has been added.") - - print(f"\n{in_progress} Creating measures...") - for i, r in dfM.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - mExpr = r['Measure Expression'] - mHidden = bool(r['Measure Hidden']) - mDF = r['Measure Display Folder'] - mDesc = r['Measure Description'] - mFS = r['Format String'] - - try: - tom.model.Tables[tName].Measures[mName] - except: - tom.add_measure(table_name = tName, measure_name=mName, expression=mExpr, hidden=mHidden, display_folder=mDF, description=mDesc, format_string=mFS) - print(f"{green_dot} The '{mName}' measure has been added.") - - for cgName in dfCI['Calculation Group Name'].unique(): - - isHidden = bool(dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Hidden'].iloc[0]) - prec = int(dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Precedence'].iloc[0]) - desc = dfCI.loc[(dfCI['Calculation Group Name'] == cgName), 'Description'].iloc[0] - - try: - tom.model.Tables[cgName] - except: - tom.add_calculation_group(name = cgName, description = desc, precedence=prec, hidden=isHidden) - print(f"{green_dot} The '{cgName}' calculation group has been added.") - tom.model.DiscourageImplicitMeasures = True - - print(f"\n{in_progress} Updating calculation group column name...") - dfC_filt = dfC[(dfC['Table Name'] == cgName) & (dfC['Hidden'] == False)] - colName = dfC_filt['Column Name'].iloc[0] - tom.model.Tables[cgName].Columns['Name'].Name = colName - - calcItems = dfCI.loc[dfCI['Calculation Group Name'] == cgName, 'Calculation Item Name'].unique() - - print(f"\n{in_progress} Creating calculation items...") - for calcItem in calcItems: - ordinal = int(dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Ordinal'].iloc[0]) - expr = dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Expression'].iloc[0] - fse = dfCI.loc[(dfCI['Calculation Group Name'] == cgName) & (dfCI['Calculation Item Name'] == calcItem), 'Format String Expression'].iloc[0] - try: - tom.model.Tables[cgName].CalculationGroup.CalculationItems[calcItem] - except: - tom.add_calculation_item(table_name = cgName, calculation_item_name=calcItem, expression=expr, format_string_expression=fse, ordinal=ordinal) - print(f"{green_dot} The '{calcItem}' has been added to the '{cgName}' calculation group.") - - print(f"\n{in_progress} Creating relationships...") - for index, row in dfR.iterrows(): - fromTable = row['From Table'] - fromColumn = row['From Column'] - toTable = row['To Table'] - toColumn = row['To Column'] - isActive = row['Active'] - cfb = row['Cross Filtering Behavior'] - sfb = row['Security Filtering Behavior'] - rori = row['Rely On Referential Integrity'] - mult = row['Multiplicity'] - - card_mapping = {'m': 'Many', '1': 'One', '0': 'None'} - - fromCard = card_mapping.get(mult[0]) - toCard = card_mapping.get(mult[-1]) - - relName = create_relationship_name(fromTable,fromColumn,toTable,toColumn) - - if any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} already exists as a relationship in the semantic model.") - elif isDirectLake and any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn and (r.FromColumn.DataType == 'DateTime' or r.ToColumn.DataType == 'DateTime') for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} was not created since relationships based on DateTime columns are not supported.") - elif isDirectLake and any(r.FromTable.Name == fromTable and r.FromColumn.Name == fromColumn and r.ToTable.Name == toTable and r.ToColumn.Name == toColumn and (r.FromColumn.DataType != r.ToColumn.DataType) for r in tom.model.Relationships): - print(f"{yellow_dot} {relName} was not created since columns used in a relationship must have the same data type.") - else: - try: - tom.add_relationship( - from_table = fromTable, from_column=fromColumn, - to_table=toTable, to_column=toColumn, - from_cardinality=fromCard,to_cardinality=toCard, - cross_filtering_behavior=cfb, - security_filtering_behavior=sfb, - rely_on_referential_integrity=rori, - is_active=isActive) - - print(f"{green_dot} The {relName} relationship has been added.") - except: - print(f"{red_dot} The {relName} relationship was not added.") - - print(f"\n{in_progress} Creating roles...") - for index, row in dfRole.iterrows(): - roleName = row['Role'] - roleDesc = row['Description'] - modPerm = row['Model Permission'] - - try: - tom.model.Roles[roleName] - except: - tom.add_role(role_name=roleName, model_permission=modPerm, description=roleDesc) - print(f"{green_dot} The '{roleName}' role has been added.") - - print(f"\n{in_progress} Creating row level security...") - for index, row in dfRLS.iterrows(): - roleName = row['Role'] - tName = row['Table'] - expr = row['Filter Expression'] - - try: - tom.set_rls(role_name=roleName, table_name=tName, filter_expression=expr) - print(f"{green_dot} Row level security for the '{tName}' table within the '{roleName}' role has been set.") - except: - print(f"{red_dot} Row level security for the '{tName}' table within the '{roleName}' role was not set.") - - print(f"\n{in_progress} Creating perspectives...") - for pName in dfP['Perspective Name'].unique(): - - try: - tom.model.Perspectives[pName] - except: - tom.add_perspective(perspective_name=pName) - print(f"{green_dot} The '{pName}' perspective has been added.") - - print(f"\n{in_progress} Adding objects to perspectives...") - for index, row in dfP.iterrows(): - pName = row['Perspective Name'] - tName = row['Table Name'] - oName = row['Object Name'] - oType = row['Object Type'] - tType = dfT.loc[(dfT['Name'] == tName), 'Type'].iloc[0] - - try: - if oType == 'Table': - tom.add_to_perspective(object = tom.model.Tables[tName], perspective_name=pName) - elif oType == 'Column': - tom.add_to_perspective(object = tom.model.Tables[tName].Columns[oName], perspective_name=pName) - elif oType == 'Measure': - tom.add_to_perspective(object = tom.model.Tables[tName].Measures[oName], perspective_name=pName) - elif oType == 'Hierarchy': - tom.add_to_perspective(object = tom.model.Tables[tName].Hierarchies[oName], perspective_name=pName) - except: - pass - - print(f"\n{in_progress} Creating translation languages...") - for trName in dfTranslation['Culture Name'].unique(): - try: - tom.model.Cultures[trName] - except: - tom.add_translation(trName) - print(f"{green_dot} The '{trName}' translation language has been added.") - - print(f"\n{in_progress} Creating translation values...") - for index, row in dfTranslation.iterrows(): - trName = row['Culture Name'] - tName = row['Table Name'] - oName = row['Object Name'] - oType = row['Object Type'] - translation = row['Translation'] - prop = row['Property'] - - if prop == 'Caption': - prop = 'Name' - elif prop == 'DisplayFolder': - prop = 'Display Folder' - - try: - if oType == 'Table': - tom.set_translation(object = tom.model.Tables[tName], language=trName, property = prop, value = translation) - elif oType == 'Column': - tom.set_translation(object = tom.model.Tables[tName].Columns[oName], language=trName, property = prop, value = translation) - elif oType == 'Measure': - tom.set_translation(object = tom.model.Tables[tName].Measures[oName], language=trName, property = prop, value = translation) - elif oType == 'Hierarchy': - tom.set_translation(object = tom.model.Tables[tName].Hierarchies[oName], language=trName, property = prop, value = translation) - elif oType == 'Level': - - pattern = r'\[([^]]+)\]' - matches = re.findall(pattern, oName) - lName = matches[0] - - pattern = r"'([^']+)'" - matches = re.findall(pattern, oName) - hName = matches[0] - tom.set_translation(object = tom.model.Tables[tName].Hierarchies[hName].Levels[lName], language=trName, property = prop, value = translation) - except: - pass - - print(f"\n{green_dot} Migration of objects from '{dataset}' -> '{new_dataset}' is complete.") - - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/MigrateTablesColumnsToSemanticModel.py b/sempy_labs/MigrateTablesColumnsToSemanticModel.py deleted file mode 100644 index 47f5d054..00000000 --- a/sempy_labs/MigrateTablesColumnsToSemanticModel.py +++ /dev/null @@ -1,135 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import datetime, time -from .ListFunctions import list_tables -from .GetSharedExpression import get_shared_expression -from .HelperFunctions import resolve_lakehouse_name -from .Lakehouse import lakehouse_attached -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def migrate_tables_columns_to_semantic_model(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if new_dataset_workspace == None: - new_dataset_workspace = workspace - - if lakehouse_workspace == None: - lakehouse_workspace = new_dataset_workspace - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Check that lakehouse is attached to the notebook - lakeAttach = lakehouse_attached() - - # Run if lakehouse is attached to the notebook or a lakehouse & lakehouse workspace are specified - if lakeAttach or (lakehouse is not None and lakehouse_workspace is not None): - shEx = get_shared_expression(lakehouse, lakehouse_workspace) - - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfT = list_tables(dataset, workspace) - dfT.rename(columns={'Type': 'Table Type'}, inplace=True) - dfC = pd.merge(dfC, dfT[['Name', 'Table Type']], left_on = 'Table Name', right_on = 'Name', how='left') - dfT_filt = dfT[dfT['Table Type'] == 'Table'] - dfC_filt = dfC[(dfC['Table Type'] == 'Table') & ~(dfC['Column Name'].str.startswith('RowNumber-')) & (dfC['Type'] != 'Calculated')] - - print(f"{in_progress} Updating '{new_dataset}' based on '{dataset}'...") - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=new_dataset, readonly=False, workspace=new_dataset_workspace) as tom: - success = True - try: - tom.model.Expressions['DatabaseQuery'] - except: - tom.add_expression('DatabaseQuery', expression = shEx) - print(f"{green_dot} The 'DatabaseQuery' expression has been added.") - - for i, r in dfT_filt.iterrows(): - tName = r['Name'] - tDC = r['Data Category'] - tHid = bool(r['Hidden']) - tDesc = r['Description'] - - try: - tom.model.Tables[tName] - except: - tom.add_table(name = tName, description=tDesc, data_category=tDC, hidden=tHid) - tom.add_entity_partition(table_name = tName, entity_name = tName.replace(' ','_')) - print(f"{green_dot} The '{tName}' table has been added.") - - for i, r in dfC_filt.iterrows(): - tName = r['Table Name'] - cName = r['Column Name'] - scName = r['Source'].replace(' ','_') - cHid = bool(r['Hidden']) - cDataType = r['Data Type'] - - try: - tom.model.Tables[tName].Columns[cName] - except: - tom.add_data_column(table_name=tName, column_name=cName, source_column=scName, hidden=cHid, data_type=cDataType) - print(f"{green_dot} The '{tName}'[{cName}] column has been added.") - - print(f"\n{green_dot} All regular tables and columns have been added to the '{new_dataset}' semantic model.") - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) - else: - print(f"{red_dot} Lakehouse not attached to notebook and lakehouse/lakehouse_workspace are not specified. Please add your lakehouse to this notebook or specify the lakehouse/lakehouse_workspace parameters.") - print(f"To attach a lakehouse to a notebook, go to the the 'Explorer' window to the left, click 'Lakehouses' to add your lakehouse to this notebook") - print(f"\nLearn more here: https://learn.microsoft.com/fabric/data-engineering/lakehouse-notebook-explore#add-or-remove-a-lakehouse") - - - - - \ No newline at end of file diff --git a/sempy_labs/MigrationValidation.py b/sempy_labs/MigrationValidation.py deleted file mode 100644 index 150f7f78..00000000 --- a/sempy_labs/MigrationValidation.py +++ /dev/null @@ -1,133 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import create_relationship_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None): - - """ - Shows a list of semantic model objects. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing a list of objects in the semantic model - """ - - if workspace is None: - workspace = fabric.resolve_workspace_name() - - df = pd.DataFrame(columns=['Parent Name', 'Object Name', 'Object Type']) - with connect_semantic_model(dataset=dataset, workspace = workspace, readonly=True) as tom: - for t in tom.model.Tables: - if t.CalculationGroup is not None: - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': 'Calculation Group'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for ci in t.CalculationGroup.CalculationItems: - new_data = {'Parent Name': t.Name, 'Object Name': ci.Name, 'Object Type': str(ci.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - elif any(str(p.SourceType) == 'Calculated' for p in t.Partitions): - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': 'Calculated Table'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - new_data = {'Parent Name': t.Parent.Name, 'Object Name': t.Name, 'Object Type': str(t.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for c in t.Columns: - if str(c.Type) != 'RowNumber': - if str(c.Type) == 'Calculated': - new_data = {'Parent Name': c.Parent.Name, 'Object Name': c.Name, 'Object Type': 'Calculated Column'} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - new_data = {'Parent Name': c.Parent.Name, 'Object Name': c.Name, 'Object Type': str(c.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for m in t.Measures: - new_data = {'Parent Name': m.Parent.Name, 'Object Name': m.Name, 'Object Type': str(m.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for h in t.Hierarchies: - new_data = {'Parent Name': h.Parent.Name, 'Object Name': h.Name, 'Object Type': str(h.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for l in h.Levels: - new_data = {'Parent Name': l.Parent.Name, 'Object Name': l.Name, 'Object Type': str(l.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for p in t.Partitions: - new_data = {'Parent Name': p.Parent.Name, 'Object Name': p.Name, 'Object Type': str(p.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for r in tom.model.Relationships: - rName = create_relationship_name(r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name) - new_data = {'Parent Name': r.Parent.Name, 'Object Name': rName, 'Object Type': str(r.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for role in tom.model.Roles: - new_data = {'Parent Name': role.Parent.Name, 'Object Name': role.Name, 'Object Type': str(role.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for rls in role.TablePermissions: - new_data = {'Parent Name': role.Name, 'Object Name': rls.Name, 'Object Type': str(rls.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for tr in tom.model.Cultures: - new_data = {'Parent Name': tr.Parent.Name, 'Object Name': tr.Name, 'Object Type': str(tr.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for per in tom.model.Perspectives: - new_data = {'Parent Name': per.Parent.Name, 'Object Name': per.Name, 'Object Type': str(per.ObjectType)} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - -@log -def migration_validation(dataset: str, new_dataset: str, workspace: Optional[str] = None, new_dataset_workspace: Optional[str] = None): - - """ - Shows the objects in the original semantic model and whether then were migrated successfully or not. - - Parameters - ---------- - dataset : str - Name of the import/DirectQuery semantic model. - new_dataset : str - Name of the Direct Lake semantic model. - workspace : str, default=None - The Fabric workspace name in which the import/DirectQuery semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - new_dataset_workspace : str - The Fabric workspace name in which the Direct Lake semantic model will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. - """ - - dfA = list_semantic_model_objects(dataset = dataset, workspace = workspace) - dfB = list_semantic_model_objects(dataset = new_dataset, workspace = new_dataset_workspace) - - def is_migrated(row): - if row['Object Type'] == 'Calculated Table': - return ((dfB['Parent Name'] == row['Parent Name']) & - (dfB['Object Name'] == row['Object Name']) & - (dfB['Object Type'].isin(['Calculated Table', 'Table']))).any() - else: - return ((dfB['Parent Name'] == row['Parent Name']) & - (dfB['Object Name'] == row['Object Name']) & - (dfB['Object Type'] == row['Object Type'])).any() - - dfA['Migrated'] = dfA.apply(is_migrated, axis=1) - - denom = len(dfA) - num = len(dfA[dfA['Migrated']]) - print(f"{100 * round(num / denom,2)}% migrated") - - return dfA \ No newline at end of file diff --git a/sempy_labs/ModelAutoBuild.py b/sempy_labs/ModelAutoBuild.py deleted file mode 100644 index 6497061c..00000000 --- a/sempy_labs/ModelAutoBuild.py +++ /dev/null @@ -1,117 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .TOM import connect_semantic_model -from .CreateBlankSemanticModel import create_blank_semantic_model -from .GetSharedExpression import get_shared_expression -from typing import List, Optional, Union -from sempy._utils._log import log - -@log -def model_auto_build(dataset: str, file_path: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Dynamically generates a semantic model based on an Excel file template. - - Parameters - ---------- - dataset : str - Name of the semantic model. - file_path : str - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if lakehouse_workspace is None: - lakehouse_workspace = workspace - - sheets = ['Model', 'Tables', 'Measures', 'Columns', 'Roles', 'Hierarchies', 'Relationships'] - - create_blank_semantic_model(dataset=dataset, workspace = workspace) - - with connect_semantic_model(dataset = dataset, workspace = workspace) as tom: - - #DL Only - expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace) - tom.add_expression(name = 'DatbaseQuery', expression = expr) - - for sheet in sheets: - df = pd.read_excel(file_path, sheet_name= sheet) - - if sheet == 'Tables': - for i, r in df.iterrows(): - tName = r['Table Name'] - desc = r['Description'] - dc = r['Data Category'] - mode = r['Mode'] - hidden = bool(r['Hidden']) - - tom.add_table(name = tName, description = desc, data_category=dc, hidden = hidden) - if mode == 'DirectLake': - tom.add_entity_partition(table_name = tName, entity_name=tName) - elif sheet == 'Columns': - for i, r in df.iterrows(): - tName = r['Table Name'] - cName = r['Column Name'] - scName = r['Source Column'] - dataType = r['Data Type'] - hidden = bool(r['Hidden']) - key = bool(r['Key']) - if dataType == 'Integer': - dataType = 'Int64' - desc = r['Description'] - - tom.add_data_column( - table_name = tName, column_name=cName, source_column=scName, - data_type=dataType, description = desc, hidden=hidden, key=key) - elif sheet == 'Measures': - for i, r in df.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - expr = r['Expression'] - desc = r['Description'] - format = r['Format String'] - hidden = bool(r['Hidden']) - - tom.add_measure( - table_name = tName, measure_name=mName, - expression=expr, format_string=format, description=desc, hidden=hidden) - elif sheet == 'Relationships': - for i, r in df.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - fromCard = r['From Cardinality'] - toCard = r['To Cardinality'] - - tom.add_relationship( - from_table=fromTable, from_column= fromColumn, - to_table=toTable, to_column = toColumn, - from_cardinality=fromCard, to_cardinality=toCard) - elif sheet == 'Roles': - print('hi') - elif sheet == 'Hierarchies': - print('hi') - - - - - \ No newline at end of file diff --git a/sempy_labs/ModelBPA.py b/sempy_labs/ModelBPA.py deleted file mode 100644 index 89f2ff97..00000000 --- a/sempy_labs/ModelBPA.py +++ /dev/null @@ -1,765 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import re, unicodedata, warnings, datetime -import numpy as np -from IPython.display import display, HTML -from pyspark.sql import SparkSession -from .GetMeasureDependencies import get_measure_dependencies -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name -from .Lakehouse import lakehouse_attached -from .GetLakehouseTables import get_lakehouse_tables -from typing import List, Optional, Union -from sempy._utils._log import log - -def model_bpa_rules(): - - """ - Shows the default rules for the semantic model BPA used by the run_model_bpa function. - - Parameters - ---------- - - - Returns - ------- - pandas.DataFrame - A pandas dataframe containing the default rules for the run_model_bpa function. - """ - - df_rules = pd.DataFrame([ - ('Performance', 'Column', 'Warning', 'Do not use floating point data types', - lambda df: df['Data Type'] == 'Double', - 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).', - ), - ('Performance', 'Column', 'Warning', 'Avoid using calculated columns', - lambda df: df['Type'] == 'Calculated', - 'Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('Performance', 'Relationship', 'Warning', 'Check if bi-directional and many-to-many relationships are valid', - lambda df: (df['Multiplicity'] == 'm:m') | (df['Cross Filtering Behavior'] == 'BothDirections'), - 'Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.', - 'https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax' - ), - ('Performance', 'Row Level Security', 'Info', 'Check if dynamic row level security (RLS) is necessary', - lambda df: df['Is Dynamic'], - 'Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.', - 'https://docs.microsoft.com/power-bi/admin/service-admin-rls', - ), - ('Performance', 'Table', 'Warning', 'Avoid using many-to-many relationships on tables used for dynamic row level security', - lambda df: (df['Used in M2M Relationship'] == True) & (df['Used in Dynamic RLS'] == True), - "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.", - 'https://www.elegantbi.com/post/dynamicrlspatterns', - ), - ('Performance', 'Relationship', 'Warning', 'Many-to-many relationships should be single-direction', - lambda df: (df['Multiplicity'] == 'm:m') & (df['Cross Filtering Behavior'] == 'BothDirections'), - ), - ('Performance', 'Column', 'Warning', 'Set IsAvailableInMdx to false on non-attribute columns', - lambda df: (df['Is Direct Lake'] == False) & (df['Is Available in MDX'] == True) & ((df['Hidden'] == True) | (df['Parent Is Hidden'] == True)) & (df['Used in Sort By'] == False) & (df['Used in Hierarchy'] == False) & (df['Sort By Column'] == None), - 'To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.', - 'https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular', - ), - #('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table", - # lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']), - # "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.", - # "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions", - #), - ('Performance', 'Table', 'Warning', "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables", - lambda df: (df['Import Partitions'] == 1) & (df['Model Has DQ']) & (df['Used in Relationship x:1']), - "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting", - - ), - ('Performance', 'Partition', 'Warning', 'Minimize Power Query transformations', - lambda df: (df['Source Type'] == 'M') & (('Table.Combine(\"' in df['Query']) | ('Table.Join(\"' in df['Query']) | ('Table.NestedJoin(\"' in df['Query']) | ('Table.AddColumn(\"' in df['Query']) | ('Table.Group(\"' in df['Query']) | ('Table.Sort(\"' in df['Query']) | ('Table.Sort(\"' in df['Query']) | ('Table.Pivot(\"' in df['Query']) | ('Table.Unpivot(\"' in df['Query']) | ('Table.UnpivotOtherColumns(\"' in df['Query']) | ('Table.Distinct(\"' in df['Query']) | ('[Query=(\"\"SELECT' in df['Query']) | ('Value.NativeQuery' in df['Query']) | ('OleDb.Query' in df['Query']) | ('Odbc.Query' in df['Query']) ), - 'Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.', - 'https://docs.microsoft.com/power-query/power-query-folding', - ), - ('Performance', 'Table', 'Warning', 'Consider a star-schema instead of a snowflake architecture', - lambda df: (df['Type'] != 'Calculation Group') & df['Used in Relationship Both Sides'], - 'Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.', - 'https://docs.microsoft.com/power-bi/guidance/star-schema', - ), - ('Performance', 'Table', 'Warning', 'Reduce usage of calculated tables', - lambda df: df['Type'] == 'Calculated Table', - 'Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.', - ), - ('Performance', 'Column', 'Warning', 'Reduce usage of calculated columns that use the RELATED function', - lambda df: (df['Type'] == 'Calculated') & (df['Source'].str.contains(r'related\s*\(', case=False)), - 'Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.', - 'https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables', - ), - ('Performance', 'Model', 'Warning', 'Avoid excessive bi-directional or many-to-many relationships', - lambda df: (df['M2M or BiDi Relationship Count'] / df['Relationship Count']) > 0.3, - 'Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.', - 'https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax', - ), - ('Performance', 'Column', 'Warning', 'Avoid bi-directional or many-to-many relationships against high-cardinality columns', - lambda df: df['Used in M2M/BiDi Relationship'] & df['Column Cardinality'] > 100000, - 'For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns', - ), - ('Performance', 'Table', 'Warning', 'Remove auto-date table', - lambda df: (df['Type'] == 'Calculated Table') & ( (df['Name'].str.startswith('DateTableTemplate_')) | (df['Name'].str.startswith('LocalDateTable_')) ), - 'Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.', - 'https://www.youtube.com/watch?v=xu3uDEHtCrg', - ), - ('Performance', 'Table', 'Warning', 'Date/calendar tables should be marked as a date table', - lambda df: ( (df['Name'].str.contains(r'date', case=False)) | (df['Name'].str.contains(r'calendar', case=False)) ) & (df['Data Category'] != 'Time'), - "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.", - 'https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables', - ), - ('Performance', 'Table', 'Warning', 'Large tables should be partitioned', - lambda df: (df['Is Direct Lake'] == False) & (df['Partition Count'] == 1) & (df['Row Count'] > 25000000), - 'Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.', - ), - ('Performance', 'Row Level Security', 'Warning', 'Limit row level security (RLS) logic', - lambda df: df['Filter Expression'].str.contains('|'.join(['right', 'left', 'filter', 'upper', 'lower', 'find' ]), case=False), - 'Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).', - ), - ('Performance', 'Model', 'Warning', 'Model should have a date table', - lambda df: df['Has Date Table'], - 'Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.', - ), - ('Performance', 'Measure', 'Warning', 'Measures using time intelligence and model is using Direct Query', - lambda df: df['DQ Date Function Used'], - 'At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.', - ), - ('Error Prevention', 'Calculation Item', 'Error', 'Calculation items must have an expression', - lambda df: df['Expression'].str.len() == 0, - 'Calculation items must have an expression. Without an expression, they will not show any values.', - ), - ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy', 'Partition'], 'Error', 'Avoid invalid characters in names', - lambda df: df['Name'].apply(lambda x: any(unicodedata.category(char) == 'Cc' and not char.isspace() for char in x)), - 'This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.', - ), - ('Error Prevention', ['Table', 'Column', 'Measure', 'Hierarchy'], 'Error', 'Avoid invalid characters in descriptions', - lambda df: df['Description'].apply(lambda x: any(unicodedata.category(char) == 'Cc' and not char.isspace() for char in x)), - 'This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.', - ), - ('Error Prevention', 'Relationship', 'Warning', 'Relationship columns should be of the same data type', - lambda df: df['From Column Data Type'] != df['To Column Data Type'], - "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.", - ), - ('Error Prevention', 'Column', 'Error', 'Data columns must have a source column', - lambda df: (df['Type'] == 'Data') & (df['Source'].str.len() == 0), - 'Data columns must have a source column. A data column without a source column will cause an error when processing the model.', - ), - ('Error Prevention', 'Column', 'Warning', 'Set IsAvailableInMdx to true on necessary columns', - lambda df: (df['Is Direct Lake'] == False) & (df['Is Available in MDX'] == False) & ((df['Used in Sort By'] == True) | (df['Used in Hierarchy'] == True) | (df['Sort By Column'] != None)), - 'In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.', - ), - ('Error Prevention', 'Table', 'Error', 'Avoid the USERELATIONSHIP function and RLS against the same table', - lambda df: (df['USERELATIONSHIP Used'] == True) & (df['Used in RLS'] == True), - "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.", - 'https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Avoid using the IFERROR function', - lambda df: df['Measure Expression'].str.contains(r'irerror\s*\(', case=False), - 'Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Use the TREATAS function instead of INTERSECT for virtual relationships', - lambda df: df['Measure Expression'].str.contains(r'intersect\s*\(', case=False), - 'The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.', - 'https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax', - ), - ('DAX Expressions', 'Measure', 'Warning', 'The EVALUATEANDLOG function should not be used in production models', - lambda df: df['Measure Expression'].str.contains(r'evaluateandlog\s*\(', case=False), - 'The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.', - 'https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Measures should not be direct references of other measures', - lambda df: df['Measure Expression'].str.strip().isin(df['Measure Object']), - "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.", - ), - ('DAX Expressions', 'Measure', 'Warning', 'No two measures should have the same definition', - lambda df: df['Measure Expression'].apply(lambda x: re.sub(r'\s+', '', x)).duplicated(keep=False), - 'Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Avoid addition or subtraction of constant values to results of divisions', - lambda df: df["Measure Expression"].str.contains("(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1" or "\\/\\s*.*(?=[-+]\\s*1)", regex=True), - ), - ('DAX Expressions', 'Measure', 'Warning', "Avoid using '1-(x/y)' syntax", - lambda df: df['Measure Expression'].str.contains("[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/" or '[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(', regex=True), - "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )", - ), - ('DAX Expressions', 'Measure', 'Warning', 'Filter measure values by columns, not tables', - lambda df: df['Measure Expression'].str.contains("(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]" or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[", regex=True), - "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)", - 'https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Filter column values with proper syntax', - lambda df: df['Measure Expression'].str.contains("(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]" or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]", regex=True), - "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"", - 'https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax', - ), - ('DAX Expressions', 'Measure', 'Warning', 'Use the DIVIDE function for division', - lambda df: df['Measure Expression'].str.contains("\\]\\s*\\/(?!\\/)(?!\\*)\" or \"\\)\\s*\\/(?!\\/)(?!\\*)",regex=True), - 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.', - 'https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator', - ), - ('DAX Expressions', 'Measure', 'Error', 'Column references should be fully qualified', - lambda df: df['Has Unqualified Column Reference'], - 'Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Measure', 'Error', 'Measure references should be unqualified', - lambda df: df['Has Fully Qualified Measure Reference'], - 'Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.', - 'https://www.elegantbi.com/post/top10bestpractices', - ), - ('DAX Expressions', 'Relationship', 'Warning', 'Inactive relationships that are never activated', - lambda df: df['Inactive without USERELATIONSHIP'], - 'Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.', - 'https://dax.guide/userelationship', - ), - ('Maintenance', 'Column', 'Warning', 'Remove unnecessary columns', - lambda df: (df['Hidden'] | df['Parent Is Hidden']) & ~ df['Used in Relationship'] & ~ df['Used in Sort By'] & ~ df['Used in Hierarchy'] & (df['Referenced By'] == 0) & ~ (df['Used in RLS']), # usedInOLS - 'Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.', - ), - ('Maintenance', 'Measure', 'Warning', 'Remove unnecessary measures', - lambda df: df['Measure Hidden'] & (df['Referenced By'] == 0), - 'Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.', - ), - #('Maintenance', 'Role', 'Warning', 'Remove roles with no members', - # lambda df: df['Member Count'] == 0, - #), - ('Maintenance', 'Table', 'Warning', 'Ensure tables have relationships', - lambda df: (df['Used in Relationship'] == False) & (df['Type'] != 'Calculation Group'), - 'This rule highlights tables which are not connected to any other table in the model with a relationship.', - ), - ('Maintenance', 'Table', 'Warning', 'Calculation groups with no calculation items', - lambda df: (df['Type'] == 'Calculation Group') & (df['Has Calculation Items']), - ), - ('Maintenance', 'Column', 'Info', 'Visible objects with no description', - lambda df: (df['Hidden'] == False) & (df['Description'].str.len() == 0), - 'Calculation groups have no function unless they have calculation items.', - ), - ('Formatting', 'Column', 'Warning', "Provide format string for 'Date' columns", - lambda df: (df['Column Name'].str.contains(r'date', case=False)) & (df['Data Type'] == 'DateTime') & (df['Format String'] != 'mm/dd/yyyy'), - 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".', - ), - ('Formatting', 'Column', 'Warning', 'Do not summarize numeric columns', - lambda df: ((df['Data Type'] == 'Int64') | (df['Data Type'] == 'Decimal') | (df['Data Type'] == 'Double')) & (df['Summarize By'] != 'None') & ~ ((df['Hidden']) | (df['Parent Is Hidden']) ), - 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).', - ), - ('Formatting', 'Measure', 'Info', 'Provide format string for measures', - lambda df: ~ ((df['Measure Hidden']) | (df['Parent Is Hidden'])) & (df['Format String'].str.len() == 0), - 'Visible measures should have their format string property assigned.', - ), - ('Formatting', 'Column', 'Info', 'Add data category for columns', - lambda df: (df['Data Category'] == '') & ((((df['Column Name'].str.contains(r'country', case=False)) | (df['Column Name'].str.contains(r'city', case=False)) | (df['Column Name'].str.contains(r'continent', case=False))) & (df['Data Type'] == 'String')) | (((df['Column Name'].str.contains(r'latitude', case=False)) | (df['Column Name'].str.contains(r'longitude', case=False))) & (df['Data Type'] == 'String')) ), - 'Add Data Category property for appropriate columns.', - 'https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization', - ), - ('Formatting', 'Measure', 'Warning', 'Percentages should be formatted with thousands separators and 1 decimal', - lambda df: (df['Format String'].str.contains('%')) & (df['Format String'] != '#,0.0%;-#,0.0%;#,0.0%'), - ), - ('Formatting', 'Measure', 'Warning', 'Whole numbers should be formatted with thousands separators and no decimals', - lambda df: (~ df['Format String'].str.contains('$')) & ~ (df['Format String'].str.contains('%')) & ~ ((df['Format String'] == '#,0') | (df['Format String'] == '#,0.0')), - ), - ('Formatting', 'Column', 'Info', 'Hide foreign keys', - lambda df: (df['Foreign Key']) & (df['Hidden'] == False), - 'Foreign keys should always be hidden.', - ), - ('Formatting', 'Column', 'Info', 'Mark primary keys', - lambda df: (df['Primary Key']) & (df['Key'] == False), - "Set the 'Key' property to 'True' for primary key columns within the column properties.", - ), - ('Formatting', 'Column', 'Info', 'Month (as a string) must be sorted', - lambda df: (df['Column Name'].str.contains(r'month', case=False)) & ~ (df['Column Name'].str.contains(r'months', case=False)) & (df['Data Type'] == 'String') & (df['Sort By Column'] == ''), - 'This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).', - ), - ('Formatting', 'Relationship', 'Warning', 'Relationship columns should be of integer data type', - lambda df: (df['From Column Data Type'] != 'Int64') | (df['To Column Data Type'] != 'Int64'), - 'It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.', - ), - ('Formatting', 'Column', 'Warning', 'Provide format string for "Month" columns', - lambda df: (df['Column Name'].str.contains(r'month', case=False)) & (df['Data Type'] == 'DateTime') & (df['Format String'] != 'MMMM yyyy'), - 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".', - ), - ('Formatting', 'Column', 'Info', 'Format flag columns as Yes/No value strings', - lambda df: ( df['Column Name'].str.startswith("Is") & (df['Data Type'] == "Int64") & ~ (df['Hidden'] | df['Parent Is Hidden']) ) | ( df['Column Name'].str.endswith(" Flag") & (df['Data Type'] != "String") & ~ (df['Hidden'] | df['Parent Is Hidden']) ), - 'Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.', - ), - #('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space', - # lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '), - # 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.', - #), - ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Info', 'First letter of objects must be capitalized', - lambda df: df['Name'].str[0].str.upper() != df['Name'].str[0], - 'The first letter of object names should be capitalized to maintain professional quality.', - ), - ('Naming Conventions', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Warning', 'Object names must not contain special characters', - lambda df: df['Name'].str.contains(r'[\t\r\n]'), - 'Object names should not include tabs, line breaks, etc.', - )#, - #('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names', - # lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(), - #)#, - - ], columns=['Category', 'Scope', 'Severity', 'Rule Name', 'Expression', 'Description', 'URL']) - - df_rules['Severity'] = df_rules['Severity'].replace('Warning', '⚠️').replace('Error', '\u274C').replace('Info', 'ℹ️') - - pd.set_option('display.max_colwidth', 1000) - - return df_rules - -@log -def run_model_bpa(dataset: str, rules_dataframe: Optional[pd.DataFrame] = None, workspace: Optional[str] = None, export: Optional[bool] = False, return_dataframe: Optional[bool] = False, **kwargs): - - """ - Displays an HTML visualization of the results of the Best Practice Analyzer scan for a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - rules_dataframe : pandas.DataFrame, default=None - A pandas dataframe containing rules to be evaluated. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - export : bool, default=False - If True, exports the resulting dataframe to a delta table in the lakehouse attached to the notebook. - return_dataframe : bool, default=False - If True, returns a pandas dataframe instead of the visualization. - - Returns - ------- - pandas.DataFrame - A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules. - """ - - if 'extend' in kwargs: - print("The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward.") - del kwargs['extend'] - - warnings.filterwarnings("ignore", message="This pattern is interpreted as a regular expression, and has match groups.") - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if rules_dataframe is None: - rules_dataframe = model_bpa_rules() - - dfT = fabric.list_tables(dataset = dataset, workspace = workspace, extended=True) - dfT = dfT.drop_duplicates() - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended=True, additional_xmla_properties=['Parent.DataCategory', 'Parent.IsHidden']) - dfC = dfC[~dfC['Column Name'].str.startswith('RowNumber-')] - - dfM = fabric.list_measures(dataset = dataset, workspace = workspace, additional_xmla_properties=['Parent.IsHidden']) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace, additional_xmla_properties=['FromCardinality', 'ToCardinality']) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, additional_xmla_properties=['DataCoverageDefinition.Expression']) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfRole = fabric.get_roles(dataset = dataset, workspace = workspace) - dfRM = fabric.get_roles(dataset = dataset, workspace = workspace, include_members=True) - dfRLS = fabric.get_row_level_security_permissions(dataset = dataset, workspace = workspace) - #dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName) - #dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName) - dfCI = fabric.list_calculation_items(dataset = dataset, workspace = workspace) - #dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName) - #dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName) - dfD = fabric.list_datasets(mode = 'rest', workspace = workspace) - dfD = dfD[dfD['Dataset Name'] == dataset] - #datasetOwner = dfD['Configured By'].iloc[0] - md = get_measure_dependencies(dataset, workspace) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - dfC['Is Direct Lake'] = isDirectLake - dfT['Is Direct Lake'] = isDirectLake - - cols = ['From Cardinality', 'To Cardinality'] - - for col in cols: - if not col in dfR: - dfR[col] = None - - cols = ['Parent Is Hidden'] - - for col in cols: - if not col in dfM: - dfM[col] = None - - # Data Coverage Definition rule - dfP_imp = dfP[dfP['Mode'] == 'Import'] - dfTP = dfP_imp.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Import Partitions'}, inplace=True) - dfP = pd.merge(dfP, dfTP[['Table Name', 'Import Partitions']], on = 'Table Name', how = 'left') - dfP['Import Partitions'].fillna(0, inplace=True) - dfC_DateKey = dfC[(dfC['Parent Data Category'] == 'Time') & (dfC['Data Type'] == 'DateTime') & (dfC['Key'])] - hasDateTable = False - - if len(dfC_DateKey) > 0: - hasDateTable = True - - dfP['Has Date Table'] = hasDateTable - - # Set dims to dual mode - dfR_one = dfR[dfR['To Cardinality'] == 'One'] - dfTP = dfP_imp.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Import Partitions'}, inplace=True) - dfT = pd.merge(dfT, dfTP, left_on = 'Name', right_on = 'Table Name', how='left') - dfT.drop(columns=['Table Name'], inplace=True) - dfT['Import Partitions'].fillna(0, inplace=True) - hasDQ = any(r['Mode'] == 'DirectQuery' for i, r in dfP.iterrows()) - dfT['Model Has DQ'] = hasDQ - dfT['Used in Relationship x:1'] = dfT['Name'].isin(dfR_one['To Table']) - - dfF = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ - SELECT [FUNCTION_NAME] - FROM $SYSTEM.MDSCHEMA_FUNCTIONS - WHERE [INTERFACE_NAME] = 'DATETIME' - """) - - dfC['Name'] = dfC['Column Name'] - dfH['Name'] = dfH['Hierarchy Name'] - dfM['Name'] = dfM['Measure Name'] - dfP['Name'] = dfP['Partition Name'] - dfRole['Name'] = dfRole['Role'] - dfD['Name'] = dfD['Dataset Name'] - dfH['Description'] = dfH['Hierarchy Description'] - dfM['Description'] = dfM['Measure Description'] - dfH['Hierarchy Object'] = format_dax_object_name(dfH['Table Name'], dfH['Hierarchy Name']) - - dfCI['Calculation Object'] = format_dax_object_name(dfCI['Calculation Group Name'], dfCI['Calculation Item Name']) - - dfRole['Member Count'] = dfRM['Role'].isin(dfRole['Role']).sum() - dfRLS['Is Dynamic'] = dfRLS['Filter Expression'].str.contains(r'userprincipalname\s*\(', case=False) | dfRLS['Filter Expression'].str.contains(r'username\s*\(', case=False) - - # Partition Count - partition_count = dfP.groupby('Table Name').size().reset_index(name='Partition Count') - dfT = pd.merge(dfT, partition_count, left_on='Name', right_on='Table Name', how='left').drop('Table Name', axis=1) - dfT['Partition Count'] = dfT['Partition Count'].fillna(0).astype(int) - - dfT = dfT.merge(dfP[['Table Name', 'Partition Name']], how='left', left_on='Name', right_on='Table Name') - dfT['First Partition Name'] = dfT.groupby('Name')['Partition Name'].transform('first') - dfT.drop('Table Name', axis=1, inplace=True) - - dfC['Sort By Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Sort By Column']) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfM['Measure Object'] = "[" + dfM['Measure Name'] + "]" - dfM['Measure Fully Qualified'] = format_dax_object_name(dfM['Table Name'], dfM['Measure Name']) - dfM['Measure Fully Qualified No Spaces'] = dfM['Table Name'] + '[' + dfM['Measure Name'] + ']' - #dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) - dfC['Column Unqualified'] = "[" + dfC['Column Name'] + "]" - dfC['Column Object No Spaces'] = dfC.apply(lambda row: row['Table Name'] + '[' + row['Column Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) - dfC['Used in Sort By'] = dfC['Column Object'].isin(dfC['Sort By Column Object']) - dfH['Column Object'] = format_dax_object_name(dfH['Table Name'], dfH['Column Name']) - dfC['Used in Hierarchy'] = dfC['Column Object'].isin(dfH['Column Object']) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfT['Used in Relationship'] = dfT['Name'].isin(dfR['From Table']) | dfT['Name'].isin(dfR['To Table']) - dfT['Used in Relationship Both Sides'] = dfT['Name'].isin(dfR['From Table']) & dfT['Name'].isin(dfR['To Table']) - dfC['Used in Relationship'] = dfC['Column Object'].isin(dfR['From Object']) | dfC['Column Object'].isin(dfR['To Object']) - - dfR_filt = dfR[(dfR['Cross Filtering Behavior'] == 'BothDirections') | (dfR['Multiplicity'] == 'm:m')] - dfC['Used in M2M/BiDi Relationship'] = dfC['Column Object'].isin(dfR_filt['From Object']) | dfC['Column Object'].isin(dfR_filt['To Object']) - dfC['Foreign Key'] = dfC['Column Object'].isin(dfR[dfR['From Cardinality'] == 'Many']['From Object']) - dfC['Primary Key'] = dfC['Column Object'].isin(dfR[dfR['To Cardinality'] == 'One']['To Object']) - dfT['Used in M2M Relationship'] = dfT['Name'].isin(dfR[dfR['Multiplicity'] == 'm:m'][['From Table']]) | dfT['Name'].isin(dfR[dfR['Multiplicity'] == 'm:m'][['To Table']]) - dfT['Used in Dynamic RLS'] = dfT['Name'].isin(dfRLS[dfRLS['Is Dynamic']]['Table']) - dfT['Used in RLS'] = dfT['Name'].isin(dfRLS.loc[dfRLS['Filter Expression'].str.len() > 0, 'Table']) - dfC['Primary Key'] = dfC['Column Object'].isin(dfR.loc[dfR['To Cardinality'] == 'One', 'To Object']) - dfD['Has Date Table'] = any((r['Parent Data Category'] == 'Time') & (r['Data Type'] == 'DateTime') & (r['Key'] == True) for i, r in dfC.iterrows()) - #dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name']) - dfD['Relationship Count'] = len(dfR) - dfD['M2M or BiDi Relationship Count'] = len(dfR[(dfR['Multiplicity'] == 'm:m') | (dfR['Cross Filtering Behavior'] == 'BothDirections')]) - dfD['Calculation Group Count'] = len(dfT[dfT['Type'] == 'Calculation Group']) - dfT['Has Calculation Items'] = np.where((dfT['Type'] == 'Calculation Group') & dfT['Name'].isin(dfCI['Calculation Group Name']), True, False) - dfP['Partition Object'] = format_dax_object_name(dfP['Table Name'], dfP['Partition Name']) - dfRLS['RLS Object'] = format_dax_object_name(dfRLS['Role'], dfRLS['Table']) - - function_pattern = '|'.join(dfF['FUNCTION_NAME'].map(re.escape)) - - dfM['DQ Date Function Used'] = any(dfP['Mode'] == 'DirectQuery') & dfM['Measure Expression'].str.contains(f'({function_pattern})\\s*\\(', case=False, regex=True) - - md['Reference'] = "'" + md['Referenced Table'] + "'[" + md['Referenced Object'] + ']' - - dfC['Referenced By'] = md[(md['Referenced Object Type'] == 'Column') & (md['Reference'].isin(dfC['Column Object']))].groupby('Reference').size().reset_index(name='Count')['Count'] - dfC['Referenced By'].fillna(0, inplace=True) - dfC['Referenced By'] = dfC['Referenced By'].fillna(0).astype(int) - - dfM['Referenced By'] = md[(md['Referenced Object Type'] == 'Measure') & (md['Referenced Object'].isin(dfM['Measure Name']))].groupby('Referenced Object').size().reset_index(name='Count')['Count'] - dfM['Referenced By'].fillna(0, inplace=True) - dfM['Referenced By'] = dfM['Referenced By'].fillna(0).astype(int) - - pattern = "[^\( ][a-zA-Z0-9_()-]+\[[^\[]+\]|'[^']+'\[[^\[]+\]|\[[^\[]+\]" - - dfM['Has Fully Qualified Measure Reference'] = False - dfM['Has Unqualified Column Reference'] = False - - for i, r in dfM.iterrows(): - tName = r['Table Name'] - mName = r['Measure Name'] - expr = r['Measure Expression'] - - matches = re.findall(pattern, expr) - - for m in matches: - if m[0] == '[': - if (m in dfC['Column Unqualified'].values) and (dfC[dfC['Table Name'] == tName]['Column Unqualified'] == m).any(): - dfM.at[i, 'Has Unqualified Column Reference'] = True - else: - if (m in dfM['Measure Fully Qualified'].values) | (m in dfM['Measure Fully Qualified No Spaces'].values): - dfM.at[i, 'Has Fully Qualified Measure Reference'] = True - - dfR['Inactive without USERELATIONSHIP'] = False - for i,r in dfR[dfR['Active'] == False].iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable = r['To Table'] - toColumn = r['To Column'] - - dfM_filt = dfM[dfM['Measure Expression'].str.contains("(?i)USERELATIONSHIP\s*\(\s*\'*" + fromTable + "\'*\[" + fromColumn + "\]\s*,\s*\'*" + toTable + "\'*\[" + toColumn + "\]" , regex=True)] - if len(dfM_filt) == 0: - dfR.at[i, 'Inactive without USERELATIONSHIP'] = True - - dfC['Used in RLS'] = ( - dfC['Column Object No Spaces'].isin(dfRLS['Filter Expression']) | - dfC['Column Object'].isin(dfRLS['Filter Expression']) | - dfC.apply(lambda row: any(row['Column Name'] in expr for expr in dfRLS.loc[dfRLS['Table'] == row['Table Name'], 'Filter Expression']), axis=1) - ) - - # Merge dfR and dfC based on 'From Object' and 'Column Object' - merged_from = pd.merge(dfR, dfC, left_on='From Object', right_on='Column Object', how='left') - merged_to = pd.merge(dfR, dfC, left_on='To Object', right_on='Column Object', how='left') - - dfR['From Column Data Type'] = merged_from['Data Type'] - dfR['To Column Data Type'] = merged_to['Data Type'] - - # Check if USERELATIONSHIP objects are used in a given column, table - userelationship_pattern = re.compile(r"USERELATIONSHIP\s*\(\s*(.*?)\s*,\s*(.*?)\s*\)", re.DOTALL | re.IGNORECASE) - - # Function to extract objects within USERELATIONSHIP function - def extract_objects(measure_expression): - matches = userelationship_pattern.findall(measure_expression) - if matches: - return [obj.strip() for match in matches for obj in match] - else: - return [] - - dfM['USERELATIONSHIP Objects'] = dfM['Measure Expression'].apply(extract_objects) - flat_object_list = [item for sublist in dfM['USERELATIONSHIP Objects'] for item in sublist] - dfC['USERELATIONSHIP Used'] = dfC['Column Object'].isin(flat_object_list) | dfC['Column Object No Spaces'].isin(flat_object_list) - dfT['USERELATIONSHIP Used'] = dfT['Name'].isin(dfC[dfC['USERELATIONSHIP Used']]['Table Name']) - dfR['Relationship Name'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) + ' -> ' + format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfH = dfH[['Name', 'Description', 'Table Name', 'Hierarchy Name', 'Hierarchy Description', 'Hierarchy Object']].drop_duplicates() - - scope_to_dataframe = { - 'Table': (dfT, ['Name']), - 'Partition': (dfP, ['Partition Object']), - 'Column': (dfC, ['Column Object']), - 'Hierarchy': (dfH, ['Hierarchy Object']), - 'Measure': (dfM, ['Measure Name']), - 'Calculation Item': (dfCI, ['Calculation Object']), - 'Relationship': (dfR, ['Relationship Name']), - 'Row Level Security': (dfRLS, ['RLS Object']), - 'Role': (dfRole, ['Role']), - 'Model': (dfD, ['Dataset Name']) - } - - def execute_rule(row): - scopes = row['Scope'] - - # support both str and list as scope type - if isinstance(scopes, str): - scopes = [scopes] - - # collect output dataframes - df_outputs = [] - - for scope in scopes: - # common fields for each scope - (df, violation_cols_or_func) = scope_to_dataframe[scope] - - if scope in ['Hierarchy', 'Measure'] and len(df) == 0: - continue - # execute rule and subset df - df_violations = df[row['Expression'](df)] - - # subset the right output columns (e.g. Table Name & Column Name) - if isinstance(violation_cols_or_func, list): - violation_func = lambda violations: violations[violation_cols_or_func] - else: - violation_func = violation_cols_or_func - - # build output data frame - df_output = violation_func(df_violations).copy() - - df_output.columns = ['Object Name'] - df_output['Rule Name'] = row['Rule Name'] - df_output['Category'] = row['Category'] - - df_output['Object Type'] = scope - df_output['Severity'] = row['Severity'] - df_output['Description'] = row['Description'] - df_output['URL'] = row['URL'] - - df_outputs.append(df_output) - - return df_outputs - - # flatten list of lists - flatten_dfs = [ - df - for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() - for df in dfs] - - finalDF = pd.concat(flatten_dfs, ignore_index=True) - - if export: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - dfExport = finalDF.copy() - delta_table_name = "modelbparesults" - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) - - lakeT = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeT_filt = lakeT[lakeT['Table Name'] == delta_table_name] - - dfExport['Severity'].replace('⚠️', 'Warning', inplace=True) - dfExport['Severity'].replace('\u274C', 'Error', inplace=True) - dfExport['Severity'].replace('ℹ️', 'Info', inplace=True) - - spark = SparkSession.builder.getOrCreate() - query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}" - - if len(lakeT_filt) == 0: - runId = 1 - else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 - - now = datetime.datetime.now() - dfExport['Workspace Name'] = workspace - dfExport['Dataset Name'] = dataset - dfExport['Timestamp'] = now - dfExport['RunId'] = runId - - dfExport['RunId'] = dfExport['RunId'].astype('int') - - colName = 'Workspace Name' - dfExport.insert(0, colName, dfExport.pop(colName)) - colName = 'Dataset Name' - dfExport.insert(1, colName, dfExport.pop(colName)) - - dfExport.columns = dfExport.columns.str.replace(' ', '_') - spark_df = spark.createDataFrame(dfExport) - spark_df.write.mode('append').format('delta').saveAsTable(delta_table_name) - print(f"\u2022 Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table.") - - if return_dataframe: - return finalDF - - pd.set_option('display.max_colwidth', 100) - - finalDF = (finalDF[['Category', 'Rule Name', 'Object Type', 'Object Name' , 'Severity', 'Description', 'URL']] - .sort_values(['Category', 'Rule Name', 'Object Type', 'Object Name']) - .set_index(['Category', 'Rule Name'])) - - bpa2 = finalDF.reset_index() - bpa_dict = { - cat: bpa2[bpa2['Category'] == cat].drop("Category", axis=1) - for cat in bpa2['Category'].drop_duplicates().values - } - - styles = """ - - """ - - # JavaScript for tab functionality - script = """ - - """ - - # JavaScript for dynamic tooltip positioning - dynamic_script = """ - - """ - - # HTML for tabs - tab_html = '
' - content_html = '' - for i, (title, df) in enumerate(bpa_dict.items()): - if df.shape[0] == 0: - continue - - tab_id = f"tab{i}" - active_class = '' - if i == 0: - active_class = 'active' - - summary = " + ".join([f'{idx} ({v})' for idx, v in df['Severity'].value_counts().items()]) - tab_html += f'' - content_html += f'
' - - # Adding tooltip for Rule Name using Description column - content_html += '' - content_html += '' - for _, row in df.iterrows(): - content_html += f'' - if pd.notnull(row["URL"]): - content_html += f'' - elif pd.notnull(row['Description']): - content_html += f'' - else: - content_html += f'' - content_html += f'' - content_html += f'' - content_html += f'' - content_html += f'' - content_html += '
Rule NameObject TypeObject NameSeverity
{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Object Type"]}{row["Object Name"]}{row["Severity"]}
' - - content_html += '
' - tab_html += '
' - - # Display the tabs, tab contents, and run the script - return display(HTML(styles + tab_html + content_html + script)) \ No newline at end of file diff --git a/sempy_labs/OneLakeIntegration.py b/sempy_labs/OneLakeIntegration.py deleted file mode 100644 index cd09c1fc..00000000 --- a/sempy_labs/OneLakeIntegration.py +++ /dev/null @@ -1,126 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from typing import List, Optional, Union -from sempy._utils._log import log - -@log -def export_model_to_onelake(dataset: str, workspace: Optional[str] = None, destination_lakehouse: Optional[str] = None, destination_workspace: Optional[str] = None): - - """ - Exports a semantic model's tables to delta tables in the lakehouse. Creates shortcuts to the tables if a lakehouse is specified. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - destination_lakehouse : str, default=None - The name of the Fabric lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. - destination_workspace : str, default=None - The name of the Fabric workspace in which the lakehouse resides. - - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if destination_workspace == None: - destination_workspace = workspace - destination_workspace_id = workspace_id - else: - destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) - - dfD = fabric.list_datasets(workspace = workspace) - dfD_filt = dfD[dfD['Dataset Name'] == dataset] - - if len(dfD_filt) == 0: - print(f"The '{dataset}' semantic model does not exist in the '{workspace}' workspace.") - return - - tmsl = f""" - {{ - 'export': {{ - 'layout': 'delta', - 'type': 'full', - 'objects': [ - {{ - 'database': '{dataset}' - }} - ] - }} - }} - """ - - # Export model's tables as delta tables - try: - fabric.execute_tmsl(script = tmsl, workspace = workspace) - print(f"The '{dataset}' semantic model's tables have been exported as delta tables to the '{workspace}' workspace.\n") - except: - print(f"ERROR: The '{dataset}' semantic model's tables have not been exported as delta tables to the '{workspace}' workspace.") - print(f"Make sure you enable OneLake integration for the '{dataset}' semantic model. Follow the instructions here: https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration") - return - - # Create shortcuts if destination lakehouse is specified - if destination_lakehouse is not None: - # Destination... - dfI_Dest = fabric.list_items(workspace = destination_workspace, type = 'Lakehouse') - dfI_filt = dfI_Dest[(dfI_Dest['Display Name'] == destination_lakehouse)] - - if len(dfI_filt) == 0: - print(f"The '{destination_lakehouse}' lakehouse does not exist within the '{destination_workspace}' workspace.") - # Create lakehouse - destination_lakehouse_id = fabric.create_lakehouse(display_name = destination_lakehouse, workspace = destination_workspace) - print(f"The '{destination_lakehouse}' lakehouse has been created within the '{destination_workspace}' workspace.\n") - else: - destination_lakehouse_id = dfI_filt['Id'].iloc[0] - - # Source... - dfI_Source = fabric.list_items(workspace = workspace, type = 'SemanticModel') - dfI_filtSource = dfI_Source[(dfI_Source['Display Name'] == dataset)] - sourceLakehouseId = dfI_filtSource['Id'].iloc[0] - - # Valid tables - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, additional_xmla_properties=['Parent.SystemManaged']) - dfP_filt = dfP[(dfP['Mode'] == 'Import') & (dfP['Source Type'] != 'CalculationGroup') & (dfP['Parent System Managed'] == False)] - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - tmc = pd.DataFrame(dfP.groupby('Table Name')['Mode'].nunique()).reset_index() - oneMode = tmc[tmc['Mode'] == 1] - tableAll = dfP_filt[dfP_filt['Table Name'].isin(dfC['Table Name'].values) & (dfP_filt['Table Name'].isin(oneMode['Table Name'].values))] - tables = tableAll['Table Name'].unique() - - client = fabric.FabricRestClient() - - print("Creating shortcuts...\n") - for tableName in tables: - tablePath = 'Tables/' + tableName - shortcutName = tableName.replace(' ','') - request_body = { - "path": 'Tables', - "name": shortcutName, - "target": { - "oneLake": { - "workspaceId": workspace_id, - "itemId": sourceLakehouseId, - "path": tablePath} - } - } - - try: - response = client.post(f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts",json=request_body) - if response.status_code == 201: - print(f"\u2022 The shortcut '{shortcutName}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace}' workspace. It is based on the '{tableName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.\n") - else: - print(response.status_code) - except: - print(f"ERROR: Failed to create a shortcut for the '{tableName}' table.") \ No newline at end of file diff --git a/sempy_labs/RefreshCalcTables.py b/sempy_labs/RefreshCalcTables.py deleted file mode 100644 index 010e9a0c..00000000 --- a/sempy_labs/RefreshCalcTables.py +++ /dev/null @@ -1,110 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import re, datetime, time -from pyspark.sql import SparkSession -from .TOM import connect_semantic_model -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): - - """ - Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - - spark = SparkSession.builder.getOrCreate() - - start_time = datetime.datetime.now() - timeout = datetime.timedelta(minutes=1) - success = False - - while not success: - try: - with connect_semantic_model(dataset=dataset, readonly=True, workspace=workspace) as tom: - success = True - for a in tom.model.Annotations: - if any(a.Name == t.Name for t in tom.model.Tables): - tName = a.Name - query = a.Value - - if not query.startswith('EVALUATE'): - daxquery = 'EVALUATE \n' + query - else: - daxquery = query - - try: - df = fabric.evaluate_dax(dataset = dataset, dax_string = daxquery, workspace = workspace) - - # Update column names for non-field parameters - if query.find('NAMEOF') == -1: - for old_column_name in df.columns: - pattern = r"\[([^\]]+)\]" - - matches = re.findall(pattern, old_column_name) - new_column_name = matches[0] - new_column_name = new_column_name.replace(' ','') - - df.rename(columns={old_column_name: new_column_name}, inplace=True) - - # Update data types for lakehouse columns - dataType = next(str(c.DataType) for c in tom.all_columns() if c.Parent.Name == tName and c.SourceColumn == new_column_name) - #dfC_type = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == new_column_name)] - #dataType = dfC_type['Data Type'].iloc[0] - - if dataType == 'Int64': - df[new_column_name] = df[new_column_name].astype(int) - elif dataType in ['Decimal', 'Double']: - df[new_column_name] = df[new_column_name].astype(float) - elif dataType == 'Boolean': - df[new_column_name] = df[new_column_name].astype(bool) - elif dataType == 'DateTime': - df[new_column_name] = pd.to_datetime(df[new_column_name]) - else: - df[new_column_name] = df[new_column_name].astype(str) - #else: - # second_column_name = df.columns[1] - # third_column_name = df.columns[2] - # df[third_column_name] = df[third_column_name].astype(int) - - # Remove calc columns from field parameters - # mask = df[second_column_name].isin(dfC_filt['Full Column Name']) - # df = df[~mask] - - delta_table_name = tName.replace(' ','_') - print(f"{in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress...") - - spark_df = spark.createDataFrame(df) - spark_df.write.mode('overwrite').format('delta').saveAsTable(delta_table_name) - print(f"{green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse.") - except: - print(f"{red_dot} Failed to create calculated table '{tName}' as a delta table in the lakehouse.") - - except Exception as e: - if datetime.datetime.now() - start_time > timeout: - break - time.sleep(1) \ No newline at end of file diff --git a/sempy_labs/ReportFunctions.py b/sempy_labs/ReportFunctions.py deleted file mode 100644 index 6c6d3b52..00000000 --- a/sempy_labs/ReportFunctions.py +++ /dev/null @@ -1,742 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import json, os, time, base64, copy, re -from anytree import Node, RenderTree -from powerbiclient import Report -from synapse.ml.services import Translate -from pyspark.sql.functions import col, flatten -from pyspark.sql import SparkSession -from .GenerateReport import update_report_from_reportjson -from .Translations import language_validate -from .Lakehouse import lakehouse_attached -from .HelperFunctions import generate_embedded_filter, resolve_dataset_name, resolve_report_id, resolve_lakehouse_name -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def get_report_json(report: str, workspace: Optional[str] = None, save_to_file_name: Optional[str] = None): - - """ - Gets the report.json file content of a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - save_to_file_name : str, default=None - Specifying this parameter will save the report.json file to the lakehouse attached to the notebook with the file name of this parameter. - - Returns - ------- - str - The report.json file for a given Power BI report. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - client = fabric.FabricRestClient() - - dfI = fabric.list_items(workspace = workspace, type = 'Report') - dfI_filt = dfI[(dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist in the '{workspace}' workspace.") - return - - itemId = dfI_filt['Id'].iloc[0] - response = client.post(f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition") - df_items = pd.json_normalize(response.json()['definition']['parts']) - df_items_filt = df_items[df_items['path'] == 'report.json'] - payload = df_items_filt['payload'].iloc[0] - - reportFile = base64.b64decode(payload).decode('utf-8') - reportJson = json.loads(reportFile) - - if save_to_file_name is not None: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"{red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - folderPath = '/lakehouse/default/Files' - fileExt = '.json' - if not save_to_file_name.endswith(fileExt): - save_to_file_name = save_to_file_name + fileExt - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: - json.dump(reportJson, json_file, indent=4) - print(f"{green_dot} The report.json file for the '{report}' report has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n") - - return reportJson - -def report_dependency_tree(workspace: Optional[str] = None): - - """ - Prints a dependency between reports and semantic models. - - Parameters - ---------- - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspaceId = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspaceId) - - dfR = fabric.list_reports(workspace = workspace) - dfD = fabric.list_datasets(workspace = workspace) - dfR = pd.merge(dfR, dfD[['Dataset ID', 'Dataset Name']], left_on = 'Dataset Id', right_on = 'Dataset ID', how = 'left') - dfR.rename(columns={'Name': 'Report Name'}, inplace=True) - dfR = dfR[['Report Name', 'Dataset Name']] - - report_icon = '\U0001F4F6' - dataset_icon = '\U0001F9CA' - workspace_icon = '\U0001F465' - - node_dict = {} - rootNode = Node(workspace) - node_dict[workspace] = rootNode - rootNode.custom_property = workspace_icon + ' ' - - for i, r in dfR.iterrows(): - datasetName = r['Dataset Name'] - reportName = r['Report Name'] - parentNode = node_dict.get(datasetName) - if parentNode is None: - parentNode = Node(datasetName, parent = rootNode) - node_dict[datasetName] = parentNode - parentNode.custom_property = dataset_icon + ' ' - - child_node = Node(reportName, parent=parentNode) - child_node.custom_property = report_icon + ' ' - - # Print the tree structure - for pre, _, node in RenderTree(node_dict[workspace]): - print(f"{pre}{node.custom_property}'{node.name}'") - -@log -def export_report(report: str, export_format: str, file_name: Optional[str] = None, bookmark_name: Optional[str] = None, page_name: Optional[str] = None, visual_name: Optional[str] = None, report_filter: Optional[str] = None, workspace: Optional[str] = None): - - """ - Exports a Power BI report to a file in your lakehouse. - - Parameters - ---------- - report : str - Name of the Power BI report. - export_format : str - The format in which to export the report. See this link for valid formats: https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group#fileformat. For image formats, enter the file extension in this parameter, not 'IMAGE'. - file_name : str, default=None - The name of the file to be saved within the lakehouse. Do not include the file extension. Defaults ot the reportName parameter value. - bookmark_name : str, default=None - The name (GUID) of a bookmark within the report. - page_name : str, default=None - The name (GUID) of the report page. - visual_name : str, default=None - The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. - report_filter : str, default=None - A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - #https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group - - lakeAttach = lakehouse_attached() - - if lakeAttach == False: - print(f"{red_dot} In order to run the 'export_report' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if isinstance(page_name,str): - page_name = [page_name] - if isinstance(visual_name,str): - visual_name = [visual_name] - - if bookmark_name is not None and (page_name is not None or visual_name is not None): - print(f"{red_dot} If the 'bookmark_name' parameter is set, the 'page_name' and 'visual_name' parameters must not be set.") - return - if visual_name is not None and page_name is None: - print(f"{red_dot} If the 'visual_name' parameter is set, the 'page_name' parameter must be set.") - return - - validFormats = { - 'ACCESSIBLEPDF': '.pdf', - 'CSV': '.csv', - 'DOCX': '.docx', - 'MHTML': '.mhtml', - 'PDF': '.pdf', - 'PNG': '.png', - 'PPTX': '.pptx', - 'XLSX': '.xlsx', - 'XML': '.xml', - 'BMP': '.bmp', - 'EMF': '.emf', - 'GIF': '.gif', - 'JPEG': '.jpeg', - 'TIFF': '.tiff' - } - - export_format = export_format.upper() - if export_format not in validFormats: - print(f"{red_dot} The '{export_format}' format is not a valid format for exporting Power BI reports. Please enter a valid format. Options: {validFormats}") - return - - fileExt = validFormats.get(export_format) - - if file_name == None: - file_name = report + fileExt - else: - file_name = file_name + fileExt - - folderPath = '/lakehouse/default/Files' - filePath = os.path.join(folderPath, file_name) - - dfI = fabric.list_items(workspace = workspace) - dfI_filt = dfI[(dfI['Type'].isin(['Report', 'PaginatedReport'])) & (dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist in the '{workspace}' workspace.") - return - - reportType = dfI_filt['Type'].iloc[0] - - # Limitations - pbiOnly = ['PNG'] - paginatedOnly = ['ACCESSIBLEPDF','CSV','DOCX', 'BMP', 'EMF', 'GIF', 'JPEG', 'TIFF', 'MHTML', 'XLSX', 'XML'] - - if reportType == 'Report' and export_format in paginatedOnly: - print(f"{red_dot} The '{export_format}' format is only supported for paginated reports.") - return - if reportType == 'PaginatedReport' and export_format in pbiOnly: - print(f"{red_dot} The '{export_format}' format is only supported for Power BI reports.") - return - - if reportType == 'PaginatedReport' and (bookmark_name is not None or page_name is not None or visual_name is not None): - print(f"{red_dot} Export for paginated reports does not support bookmarks/pages/visuals. Those parameters must not be set for paginated reports.") - return - - reportId = dfI_filt['Id'].iloc[0] - client = fabric.PowerBIRestClient() - - dfVisual = list_report_visuals(report = report, workspace = workspace) - dfPage = list_report_pages(report = report, workspace = workspace) - - if export_format in ['BMP', 'EMF', 'GIF', 'JPEG', 'TIFF'] and reportType == 'PaginatedReport': - request_body = { - 'format': 'IMAGE', - 'paginatedReportConfiguration': { - 'formatSettings': { - 'OutputFormat': export_format.lower() - } - } - } - elif bookmark_name is None and page_name is None and visual_name is None: - request_body = { - 'format': export_format - } - elif bookmark_name is not None: - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - 'defaultBookmark': { - 'name': bookmark_name - } - } - } - elif page_name is not None and visual_name is None: - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - } - } - - request_body['powerBIReportConfiguration']['pages'] = [] - - for page in page_name: - dfPage_filt = dfPage[dfPage['Page ID'] == page] - if len(dfPage_filt) == 0: - print(f"{red_dot} The '{page}' page does not exist in the '{report}' report within the '{workspace}' workspace.") - return - page_dict = {'pageName': page} - request_body['powerBIReportConfiguration']['pages'].append(page_dict) - - elif page_name is not None and visual_name is not None: - if len(page_name) != len(visual_name): - print(f"{red_dot} Each 'visual_name' must map to a single 'page_name'.") - return - if reportType == 'Report': - request_body = { - 'format': export_format, - 'powerBIReportConfiguration': { - } - } - - request_body['powerBIReportConfiguration']['pages'] = [] - a=0 - for page in page_name: - visual = visual_name[a] - dfVisual_filt = dfVisual[(dfVisual['Page ID'] == page) & (dfVisual['Visual ID'] == visual)] - if len(dfVisual_filt) == 0: - print(f"{red_dot} The '{visual}' visual does not exist on the '{page}' in the '{report}' report within the '{workspace}' workspace.") - return - page_dict = {'pageName': page,'visualName': visual} - request_body['powerBIReportConfiguration']['pages'].append(page_dict) - a+=1 - - # Transform and add report filter if it is specified - if report_filter is not None and reportType == 'Report': - reportFilter = generate_embedded_filter(filter = report_filter) - report_level_filter = {'filter': reportFilter} - - if 'powerBIReportConfiguration' not in request_body: - request_body['powerBIReportConfiguration'] = {} - request_body['powerBIReportConfiguration']['reportLevelFilters'] = [report_level_filter] - print(request_body) - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/ExportTo",json=request_body) - if response.status_code == 202: - response_body = json.loads(response.content) - exportId = response_body['id'] - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}") - response_body = json.loads(response.content) - while response_body['status'] not in ['Succeeded', 'Failed']: - time.sleep(3) - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}") - response_body = json.loads(response.content) - if response_body['status'] == 'Failed': - print(f"{red_dot} The export for the '{report}' report within the '{workspace}' workspace in the '{export_format}' format has failed.") - else: - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}/file") - print(f"{in_progress} Saving the '{export_format}' export for the '{report}' report within the '{workspace}' workspace to the lakehouse...") - with open(filePath, "wb") as export_file: - export_file.write(response.content) - print(f"{green_dot} The '{export_format}' export for the '{report}' report within the '{workspace}' workspace has been saved to the following location: '{filePath}'.") - - -def clone_report(report: str, cloned_report: str, workspace: Optional[str] = None, target_workspace: Optional[str] = None, target_dataset: Optional[str] = None): - - """ - Clones a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - cloned_report : str - Name of the new Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - target_workspace : str, default=None - The name of the Fabric workspace to place the cloned report. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - target_dataset : str, default=None - The name of the semantic model to be used by the cloned report. - Defaults to None which resolves to the semantic model used by the initial report. - - Returns - ------- - - """ - - #https://learn.microsoft.com/rest/api/power-bi/reports/clone-report-in-group - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - dfI = fabric.list_items(workspace = workspace, type = 'Report') - dfI_filt = dfI[(dfI['Display Name'] == report)] - - if len(dfI_filt) == 0: - print(f"{red_dot} The '{report}' report does not exist within the '{workspace}' workspace.") - return - - reportId = resolve_report_id(report, workspace) - - if target_workspace is None: - target_workspace = workspace - target_workspace_id = workspace_id - else: - dfW = fabric.list_workspaces() - dfW_filt = dfW[dfW['Name'] == target_workspace] - - if len(dfW_filt) == 0: - print(f"{red_dot} The '{workspace}' is not a valid workspace.") - return - target_workspace_id = dfW_filt['Id'].iloc[0] - - if target_dataset == None: - dfR = fabric.list_reports(workspace = target_workspace) - dfR_filt = dfR[dfR['Name'] == report] - target_dataset_id = dfR_filt['Dataset Id'].iloc[0] - target_dataset = resolve_dataset_name(dataset_id = target_dataset_id, workspace = target_workspace) - else: - dfD = fabric.list_datasets(workspace = target_workspace) - dfD_filt = dfD[dfD['Dataset Name'] == target_dataset] - - if len(dfD_filt) == 0: - print(f"{red_dot} The '{target_dataset}' target dataset does not exist in the '{target_workspace}' workspace.") - return - target_dataset_id = dfD_filt['Dataset Id'].iloc[0] - - client = fabric.PowerBIRestClient() - - if target_workspace is None and target_dataset is None: - request_body = { - "name": cloned_report - } - elif target_workspace is not None and target_dataset is None: - request_body = { - "name": cloned_report, - "targetWorkspaceId": target_workspace_id - } - elif target_workspace is not None and target_dataset is not None: - request_body = { - "name": cloned_report, - "targetModelId": target_dataset_id, - "targetWorkspaceId": target_workspace_id - } - elif target_workspace is None and target_dataset is not None: - request_body = { - "name": cloned_report, - "targetModelId": target_dataset_id - } - - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/Clone",json=request_body) - - if response.status_code == 200: - print(f"{green_dot} The '{report}' report has been successfully cloned as the '{cloned_report}' report within the '{target_workspace}' workspace using the '{target_dataset}' semantic model.") - else: - print(f"{red_dot} POST request failed with status code: {response.status_code}") - -def launch_report(report: str, workspace: Optional[str] = None): - - """ - Shows a Power BI report within a Fabric notebook. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - str - An embedded Power BI report within the notebook. - """ - - from .HelperFunctions import resolve_report_id - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - reportId = resolve_report_id(report, workspace) - - report = Report(group_id=workspace_id, report_id=reportId) - - return report - -def list_report_pages(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all pages within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the pages within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - df = pd.DataFrame(columns=['Page ID', 'Page Name', 'Hidden', 'Width', 'Height', 'Visual Count']) - - reportJson = get_report_json(report = report, workspace = workspace) - - for section in reportJson['sections']: - pageID = section['name'] - pageName = section['displayName'] - #pageFilters = section['filters'] - pageWidth = section['width'] - pageHeight = section['height'] - visualCount = len(section['visualContainers']) - pageHidden = False - pageConfig = section['config'] - pageConfigJson = json.loads(pageConfig) - - try: - pageH = pageConfigJson['visibility'] - if pageH == 1: - pageHidden = True - except: - pass - - new_data = {'Page ID': pageID, 'Page Name': pageName, 'Hidden': pageHidden, 'Width': pageWidth, 'Height': pageHeight, 'Visual Count': visualCount} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Hidden'] = df['Hidden'].astype(bool) - intCol = ['Width', 'Height', 'Visual Count'] - df[intCol] = df[intCol].astype(int) - - return df - -def list_report_visuals(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all visuals within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the visuals within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - reportJson = get_report_json(report = report, workspace = workspace) - - df = pd.DataFrame(columns=['Page Name', 'Page ID', 'Visual ID', 'Title']) - - for section in reportJson['sections']: - pageID = section['name'] - pageName = section['displayName'] - - for visual in section['visualContainers']: - visualConfig = visual['config'] - visualConfigJson = json.loads(visualConfig) - visualID = visualConfigJson['name'] - - try: - title = visualConfigJson["singleVisual"]["vcObjects"]["title"][0]["properties"]["text"]["expr"]["Literal"]["Value"] - title = title[1:-1] - except: - title = '' - - new_data = {'Page Name': pageName, 'Page ID': pageID, 'Visual ID': visualID, 'Title': title} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - -def list_report_bookmarks(report: str, workspace: Optional[str] = None): - - """ - Shows the properties of all bookmarks within a Power BI report. - - Parameters - ---------- - report : str - Name of the Power BI report. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the bookmarks within a Power BI report and their properties. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - df = pd.DataFrame(columns=['Bookmark ID', 'Bookmark Name', 'Page ID', 'Visual ID', 'Visual Hidden']) - - reportJson = get_report_json(report = report, workspace = workspace) - reportConfig = reportJson['config'] - reportConfigJson = json.loads(reportConfig) - - try: - for bookmark in reportConfigJson['bookmarks']: - bID = bookmark['name'] - bName = bookmark['displayName'] - rptPageId = bookmark['explorationState']['activeSection'] - - for rptPg in bookmark['explorationState']['sections']: - for vc in bookmark['explorationState']['sections'][rptPg]['visualContainers']: - vHidden = False - try: - hidden = bookmark['explorationState']['sections'][rptPg]['visualContainers'][vc]['singleVisual']['display']['mode'] - if hidden == 'hidden': - vHidden = True - except: - pass - - new_data = {'Bookmark ID': bID, 'Bookmark Name': bName, 'Page ID': rptPageId, 'Visual ID': vc, 'Visual Hidden': vHidden } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - listPages = list_report_pages(report = report, workspace = workspace) - - df = pd.merge(df, listPages[['Page ID', 'Page Name']], on='Page ID', how='left') - df = df[['Bookmark ID', 'Bookmark Name', 'Page ID', 'Page Name', 'Visual ID', 'Visual Hidden']] - - return df - - except: - print(f"The '{report}' report within the '{workspace}' workspace has no bookmarks.") - -def translate_report_titles(report: str, languages: Union[str,List[str]], workspace: Optional[str] = None): - - """ - Dynamically generates new Power BI reports which have report titles translated into the specified language(s). - - Parameters - ---------- - report : str - Name of the Power BI report. - languages : str, List[str] - The language code(s) in which to translate the report titles. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if isinstance(languages, str): - languages = [languages] - - for lang in languages: - language_validate(lang) - - reportJson = get_report_json(report = report, workspace = workspace) - dfV = list_report_visuals(report = report, workspace = workspace) - spark = SparkSession.builder.getOrCreate() - df = spark.createDataFrame(dfV) - columnToTranslate = 'Title' - - translate = ( - Translate() - .setTextCol(columnToTranslate) - .setToLanguage(languages) - .setOutputCol("translation") - .setConcurrency(5) - ) - - transDF = (translate - .transform(df) - .withColumn("translation", flatten(col("translation.translations"))) - .withColumn("translation", col("translation.text")) - .select('Visual ID', columnToTranslate, 'translation')) - - df_panda = transDF.toPandas() - - i=0 - for lang in languages: - #Clone report - language = language_validate(lang) - clonedReportName = f"{report}_{language}" - - dfRep = fabric.list_reports(workspace = workspace) - dfRep_filt = dfRep[(dfRep['Name'] == clonedReportName) & (dfRep['Report Type'] == 'PowerBIReport')] - - if len(dfRep_filt) > 0: - print(f"{yellow_dot} The '{clonedReportName}' report already exists in the '{workspace} workspace.") - else: - clone_report(report = report, cloned_report = clonedReportName, workspace = workspace) - print(f"{green_dot} The '{clonedReportName}' report has been created via clone in the '{workspace} workspace.") - - rptJsonTr = copy.deepcopy(reportJson) - - # Update report json file - for section in rptJsonTr['sections']: - for visual in section['visualContainers']: - visualConfig = visual['config'] - visualConfigJson = json.loads(visualConfig) - visualID = visualConfigJson['name'] - - df_filt = df_panda[(df_panda['Visual ID'] == visualID) & (df_panda['Title'] != '')] - - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - if len(tr) > 0: - prop = visualConfigJson["singleVisual"]["vcObjects"]["title"][0]["properties"]["text"]["expr"]["Literal"] - prop['Value'] = f"'{tr}'" - - visual['config'] = json.dumps(visualConfigJson) - - i+=1 - - # Post updated report json file to cloned report - update_report_from_reportjson(report = clonedReportName, report_json = rptJsonTr, workspace = workspace) - print(f"{green_dot} The visual titles within the '{clonedReportName}' report within the '{workspace}' have been translated into '{language}' accordingly.") - - - - - - \ No newline at end of file diff --git a/sempy_labs/ShowUnsupportedDirectLakeObjects.py b/sempy_labs/ShowUnsupportedDirectLakeObjects.py deleted file mode 100644 index 0f4277a0..00000000 --- a/sempy_labs/ShowUnsupportedDirectLakeObjects.py +++ /dev/null @@ -1,68 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from .ListFunctions import list_tables -from .HelperFunctions import format_dax_object_name -from typing import List, Optional, Union - -def show_unsupported_direct_lake_objects(dataset: str, workspace: Optional[str] = None): - - """ - Returns a list of a semantic model's objects which are not supported by Direct Lake based on [official documentation](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations). - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame, pandas.DataFrame, pandas.DataFrame - 3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake. - """ - - pd.options.mode.chained_assignment = None - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - dfT = list_tables(dataset, workspace) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - - # Calc tables - dfT_filt = dfT[dfT['Type'] == 'Calculated Table'] - dfT_filt.rename(columns={'Name': 'Table Name'}, inplace=True) - t = dfT_filt[['Table Name', 'Type']] - - # Calc columns - dfC_filt = dfC[(dfC['Type'] == 'Calculated') | (dfC['Data Type'] == 'Binary')] - c = dfC_filt[['Table Name', 'Column Name', 'Type', 'Data Type', 'Source']] - - # Relationships - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - merged_from = pd.merge(dfR, dfC, left_on='From Object', right_on='Column Object', how='left') - merged_to = pd.merge(dfR, dfC, left_on='To Object', right_on='Column Object', how='left') - - dfR['From Column Data Type'] = merged_from['Data Type'] - dfR['To Column Data Type'] = merged_to['Data Type'] - - dfR_filt = dfR[((dfR['From Column Data Type'] == 'DateTime') | (dfR['To Column Data Type'] == 'DateTime')) | (dfR['From Column Data Type'] != dfR['To Column Data Type'])] - r = dfR_filt[['From Table', 'From Column', 'To Table', 'To Column', 'From Column Data Type', 'To Column Data Type']] - - #print('Calculated Tables are not supported...') - #display(t) - #print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations") - #print('Calculated columns are not supported. Columns of binary data type are not supported.') - #display(c) - #print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.') - #display(r) - - return t, c, r \ No newline at end of file diff --git a/sempy_labs/TOM.py b/sempy_labs/TOM.py deleted file mode 100644 index d5a18fff..00000000 --- a/sempy_labs/TOM.py +++ /dev/null @@ -1,3251 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -import re -from datetime import datetime -from .HelperFunctions import format_dax_object_name -from .ListFunctions import list_relationships -from .RefreshSemanticModel import refresh_semantic_model -from .Fallback import check_fallback_reason -from contextlib import contextmanager -from typing import List, Optional, Union, TYPE_CHECKING -from sempy._utils._log import log - -if TYPE_CHECKING: - import Microsoft.AnalysisServices.Tabular - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' -checked = '\u2611' -unchecked = '\u2610' -start_bold = '\033[1m' -end_bold = '\033[0m' - -@log -@contextmanager -def connect_semantic_model(dataset: str, readonly: Optional[bool] = True, workspace: Optional[str] = None): - - """ - Connects to the Tabular Object Model (TOM) within a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - readonly: bool, default=True - Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - str - A connection to the semantic model's Tabular Object Model. - """ - - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - import System - - if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - fpAdded = [] - - class TOMWrapper: - - def __init__(self, dataset, workspace, readonly): - - tom_server = fabric.create_tom_server(readonly=readonly, workspace=workspace) - self.model = tom_server.Databases.GetByName(dataset).Model - - def all_columns(self): - - """ - Outputs a list of all columns within all tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Column] - All columns within the semantic model. - """ - - for t in self.model.Tables: - for c in t.Columns: - if c.Type != TOM.ColumnType.RowNumber: - yield c - - def all_calculated_columns(self): - - """ - Outputs a list of all calculated columns within all tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Column] - All calculated columns within the semantic model. - """ - - for t in self.model.Tables: - for c in t.Columns: - if c.Type == TOM.ColumnType.Calculated: - yield c - - def all_calculated_tables(self): - - """ - Outputs a list of all calculated tables in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Table] - All calculated tables within the semantic model. - """ - - for t in self.model.Tables: - if any(p.SourceType == TOM.ColumnType.Calculated for p in t.Partitions): - yield t - - def all_calculation_groups(self): - - """ - Outputs a list of all calculation groups in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Table] - All calculation groups within the semantic model. - """ - - for t in self.model.Tables: - if t.CalculationGroup is not None: - yield t - - def all_measures(self): - - """ - Outputs a list of all measures in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Measure] - All measures within the semantic model. - """ - - for t in self.model.Tables: - for m in t.Measures: - yield m - - def all_partitions(self): - - """ - Outputs a list of all partitions in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Partition] - All partitions within the semantic model. - """ - - for t in self.model.Tables: - for p in t.Partitions: - yield p - - def all_hierarchies(self): - - """ - Outputs a list of all hierarchies in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Hierarchy] - All hierarchies within the semantic model. - """ - - for t in self.model.Tables: - for h in t.Hierarchies: - yield h - - def all_levels(self): - - """ - Outputs a list of all levels in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.Level] - All levels within the semantic model. - """ - - for t in self.model.Tables: - for h in t.Hierarchies: - for l in h.Levels: - yield l - - def all_calculation_items(self): - - """ - Outputs a list of all calculation items in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.CalculationItem] - All calculation items within the semantic model. - """ - - for t in self.model.Tables: - if t.CalculationGroup is not None: - for ci in t.CalculationGroup.CalculationItems: - yield ci - - def all_rls(self): - - """ - Outputs a list of all row level security expressions in the semantic model. - - Parameters - ---------- - - Returns - ------- - Iterator[Microsoft.AnalysisServices.Tabular.TablePermission] - All row level security expressions within the semantic model. - """ - - for r in self.model.Roles: - for tp in r.TablePermissions: - yield tp - - def add_measure(self, table_name: str, measure_name: str, expression: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None): - - """ - Adds a measure to the semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the measure will be created. - measure_name : str - Name of the measure. - expression : str - DAX expression of the measure. - format_string : str, default=None - Format string of the measure. - hidden : bool, default=False - Whether the measure will be hidden or visible. - description : str, default=None - A description of the measure. - display_folder : str, default=None - The display folder in which the measure will reside. - - Returns - ------- - - """ - - obj = TOM.Measure() - obj.Name= measure_name - obj.Expression = expression - obj.IsHidden = hidden - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - - self.model.Tables[table_name].Measures.Add(obj) - - def add_calculated_table_column(self, table_name: str, column_name: str, source_column: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - - """ - Adds a calculated table column to a calculated table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - source_column : str - The source column for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') - if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') - - obj = TOM.CalculatedTableColumn() - obj.Name = column_name - obj.SourceColumn = source_column - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsHidden = hidden - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_data_column(self, table_name: str, column_name: str, source_column: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - - """ - Adds a data column to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - source_column : str - The source column for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') - if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') - - obj = TOM.DataColumn() - obj.Name = column_name - obj.SourceColumn = source_column - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsHidden = hidden - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_calculated_column(self, table_name: str, column_name: str, expression: str, data_type: str, format_string: Optional[str] = None, hidden: Optional[bool] = False, description: Optional[str] = None, display_folder: Optional[str] = None, data_category: Optional[str] = None, key: Optional[bool] = False, summarize_by: Optional[str] = None): - - """ - Adds a calculated column to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the column will be created. - column_name : str - Name of the column. - expression : str - The DAX expression for the column. - data_type : str - The data type of the column. - format_string : str, default=None - Format string of the column. - hidden : bool, default=False - Whether the column will be hidden or visible. - description : str, default=None - A description of the column. - display_folder : str, default=None - The display folder in which the column will reside. - data_category : str, default=None - The data category of the column. - key : bool, default=False - Marks the column as the primary key of the table. - summarize_by : str, default=None - Sets the value for the Summarize By property of the column. - Defaults to None resolves to 'Default'. - - Returns - ------- - - """ - - data_type = data_type.capitalize().replace('Integer', 'Int64').replace('Datetime', 'DateTime') - if summarize_by is None: - summarize_by = 'Default' - summarize_by = summarize_by.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') - - obj = TOM.CalculatedColumn() - obj.Name = column_name - obj.Expression = expression - obj.IsHidden = hidden - obj.DataType = System.Enum.Parse(TOM.DataType, data_type) - obj.IsKey = key - obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) - if format_string is not None: - obj.FormatString = format_string - if description is not None: - obj.Description = description - if display_folder is not None: - obj.DisplayFolder = display_folder - if data_category is not None: - obj.DataCategory = data_category - self.model.Tables[table_name].Columns.Add(obj) - - def add_calculation_item(self, table_name: str, calculation_item_name: str, expression: str, ordinal: Optional[int] = None, format_string_expression: Optional[str] = None, description: Optional[str] = None): - - """ - Adds a calculation item to a calculation group within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table in which the calculation item will be created. - calculation_item_name : str - Name of the calculation item. - expression : str - The DAX expression for the calculation item. - ordinal : int, default=None - The ordinal of the calculation item. - format_string_expression : str, default=None - The format string expression for the calculation item. - description : str, default=None - A description of the calculation item. - - Returns - ------- - - """ - - obj = TOM.CalculationItem() - fsd = TOM.FormatStringDefinition() - obj.Name = calculation_item_name - obj.Expression = expression - if ordinal is not None: - obj.Ordinal = ordinal - if description is not None: - obj.Description = description - if format_string_expression is not None: - obj.FormatStringDefinition = fsd.Expression = format_string_expression - self.model.Tables[table_name].CalculationGroup.CalculationItems.Add(obj) - - def add_role(self, role_name: str, model_permission: Optional[str] = None, description: Optional[str] = None): - - """ - Adds a role to a semantic model. - - Parameters - ---------- - role_name : str - Name of the role. - model_permission : str, default=None - The model permission for the role. - Defaults to None which resolves to 'Read'. - description : str, default=None - A description of the role. - - Returns - ------- - - """ - - if model_permission is None: - model_permission = 'Read' - - obj = TOM.ModelRole() - obj.Name = role_name - obj.ModelPermission = System.Enum.Parse(TOM.ModelPermission, model_permission) - if description is not None: - obj.Description = description - self.model.Roles.Add(obj) - - def set_rls(self, role_name: str, table_name: str, filter_expression: str): - - """ - Sets the row level security permissions for a table within a role. - - Parameters - ---------- - role_name : str - Name of the role. - table_name : str - Name of the table. - filter_expression : str - The DAX expression containing the row level security filter expression logic. - - Returns - ------- - - """ - - tp = TOM.TablePermission() - tp.Table = self.model.Tables[table_name] - tp.FilterExpression = filter_expression - - try: - self.model.Roles[role_name].TablePermissions[table_name].FilterExpression = filter_expression - except: - self.model.Roles[role_name].TablePermissions.Add(tp) - - def set_ols(self, role_name: str, table_name: str, column_name: str, permission: str): - - """ - Sets the object level security permissions for a column within a role. - - Parameters - ---------- - role_name : str - Name of the role. - table_name : str - Name of the table. - column_name : str - Name of the column. - permission : str - The object level security permission for the column. - - Returns - ------- - - """ - - permission = permission.capitalize() - - if permission not in ['Read', 'None', 'Default']: - print(f"ERROR! Invalid 'permission' value.") - return - - cp = TOM.ColumnPermission() - cp.Column = self.model.Tables[table_name].Columns[column_name] - cp.MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) - try: - self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions[column_name].MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) - except: - self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions.Add(cp) - - def add_hierarchy(self, table_name: str, hierarchy_name: str, columns: List[str], levels: Optional[List[str]] = None, hierarchy_description: Optional[str] = None, hierarchy_hidden: Optional[bool] = False): - - """ - Adds a hierarchy to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - hierarchy_name : str - Name of the hierarchy. - columns : List[str] - Names of the columns to use within the hierarchy. - levels : List[str], default=None - Names of the levels to use within the hierarhcy (instead of the column names). - hierarchy_description : str, default=None - A description of the hierarchy. - hierarchy_hidden : bool, default=False - Whether the hierarchy is visible or hidden. - - Returns - ------- - - """ - - if isinstance(columns, str): - print(f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']") - return - if len(columns) == 1: - print(f"There must be at least 2 levels in order to create a hierarchy.") - return - - if levels is None: - levels = columns - - if len(columns) != len(levels): - print(f"If specifying level names, you must specify a level for each column.") - return - - obj = TOM.Hierarchy() - obj.Name = hierarchy_name - obj.IsHidden = hierarchy_hidden - if hierarchy_description is not None: - obj.Description = hierarchy_description - self.model.Tables[table_name].Hierarchies.Add(obj) - - for col in columns: - lvl = TOM.Level() - lvl.Column = self.model.Tables[table_name].Columns[col] - lvl.Name = levels[columns.index(col)] - lvl.Ordinal = columns.index(col) - self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add(lvl) - - def add_relationship(self, from_table: str, from_column: str, to_table: str, to_column: str, from_cardinality: str, to_cardinality: str, cross_filtering_behavior: Optional[str] = None, is_active: Optional[bool] = True, security_filtering_behavior: Optional[str] = None, rely_on_referential_integrity: Optional[bool] = False): - - """ - Adds a relationship to a semantic model. - - Parameters - ---------- - from_table : str - Name of the table on the 'from' side of the relationship. - from_column : str - Name of the column on the 'from' side of the relationship. - to_table : str - Name of the table on the 'to' side of the relationship. - to_column : str - Name of the column on the 'to' side of the relationship. - from_cardinality : str - The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. - to_cardinality : str - The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. - cross_filtering_behavior : str, default=None - Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). - Defaults to None which resolves to 'Automatic'. - is_active : bool, default=True - Setting for whether the relationship is active or not. - security_filtering_behavior : str, default=None - Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). - Defaults to None which resolves to 'OneDirection'. - rely_on_referential_integrity : bool, default=False - Setting for the rely on referential integrity of the relationship. - - Returns - ------- - - """ - - if cross_filtering_behavior is None: - cross_filtering_behavior = 'Automatic' - if security_filtering_behavior is None: - security_filtering_behavior = 'OneDirection' - - from_cardinality = from_cardinality.capitalize() - to_cardinality = to_cardinality.capitalize() - cross_filtering_behavior = cross_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.replace('direct', 'Direct') - cross_filtering_behavior = cross_filtering_behavior.replace('direct', 'Direct') - - rel = TOM.SingleColumnRelationship() - rel.FromColumn = self.model.Tables[from_table].Columns[from_column] - rel.FromCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, from_cardinality) - rel.ToColumn = self.model.Tables[to_table].Columns[to_column] - rel.ToCardinality = System.Enum.Parse(TOM.RelationshipEndCardinality, to_cardinality) - rel.IsActive = is_active - rel.CrossFilteringBehavior = System.Enum.Parse(TOM.CrossFilteringBehavior, cross_filtering_behavior) - rel.SecurityFilteringBehavior = System.Enum.Parse(TOM.SecurityFilteringBehavior, security_filtering_behavior) - rel.RelyOnReferentialIntegrity = rely_on_referential_integrity - - self.model.Relationships.Add(rel) - - def add_calculation_group(self, name: str, precedence: int, description: Optional[str] = None, hidden: Optional[bool] = False): - - """ - Adds a calculation group to a semantic model. - - Parameters - ---------- - name : str - Name of the calculation group. - precedence : int - The precedence of the calculation group. - description : str, default=None - A description of the calculation group. - hidden : bool, default=False - Whether the calculation group is hidden/visible. - - Returns - ------- - - """ - - tbl = TOM.Table() - tbl.Name = name - tbl.CalculationGroup = TOM.CalculationGroup() - tbl.CalculationGroup.Precedence = precedence - tbl.IsHidden = hidden - if description is not None: - tbl.Description = description - - part = TOM.Partition() - part.Name = name - part.Source = TOM.CalculationGroupSource() - tbl.Partitions.Add(part) - - sortCol = 'Ordinal' - - col1 = TOM.DataColumn() - col1.Name = sortCol - col1.SourceColumn = sortCol - col1.IsHidden = True - col1.DataType = System.Enum.Parse(TOM.DataType, 'Int64') - - tbl.Columns.Add(col1) - - col2 = TOM.DataColumn() - col2.Name = 'Name' - col2.SourceColumn = 'Name' - col2.DataType = System.Enum.Parse(TOM.DataType, 'String') - #col.SortByColumn = m.Tables[name].Columns[sortCol] - tbl.Columns.Add(col2) - - self.model.DiscourageImplicitMeasures = True - self.model.Tables.Add(tbl) - - def add_expression(self, name: str, expression: str, description: Optional[str] = None): - - """ - Adds an expression to a semantic model. - - Parameters - ---------- - name : str - Name of the expression. - expression: str - The M expression of the expression. - description : str, default=None - A description of the expression. - - Returns - ------- - - """ - - exp = TOM.NamedExpression() - exp.Name = name - if description is not None: - exp.Description = description - exp.Kind = TOM.ExpressionKind.M - exp.Expression = expression - - self.model.Expressions.Add(exp) - - def add_translation(self, language: str): - - """ - Adds a translation language (culture) to a semantic model. - - Parameters - ---------- - language : str - The language code (i.e. 'it-IT' for Italian). - - Returns - ------- - - """ - - cul = TOM.Culture() - cul.Name = language - - try: - self.model.Cultures.Add(cul) - except: - pass - - def add_perspective(self, perspective_name: str): - - """ - Adds a perspective to a semantic model. - - Parameters - ---------- - perspective_name : str - Name of the perspective. - - Returns - ------- - - """ - - persp = TOM.Perspective() - persp.Name = perspective_name - self.model.Perspectives.Add(persp) - - def add_m_partition(self, table_name: str, partition_name: str, expression: str, mode: Optional[str] = None, description: Optional[str] = None): - - """ - Adds an M-partition to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - partition_name : str - Name of the partition. - expression : str - The M expression encapsulating the logic for the partition. - mode : str, default=None - The query mode for the partition. - Defaults to None which resolves to 'Import'. - description : str, default=None - A description for the partition. - - Returns - ------- - - """ - - mode = mode.title().replace('query', 'Query').replace(' ','').replace('lake', 'Lake') - - mp = TOM.MPartitionSource() - mp.Expression = expression - p = TOM.Partition() - p.Name = partition_name - p.Source = mp - if description is not None: - p.Description = description - if mode is None: - mode = 'Default' - p.Mode = System.Enum.Parse(TOM.ModeType, mode) - - self.model.Tables[table_name].Partitions.Add(p) - - def add_entity_partition(self, table_name: str, entity_name: str, expression: Optional[str] = None, description: Optional[str] = None): - - """ - Adds an entity partition to a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - entity_name : str - Name of the lakehouse table. - expression : TOM Object, default=None - The expression used by the table. - Defaults to None which resolves to the 'DatabaseQuery' expression. - description : str, default=None - A description for the partition. - - Returns - ------- - - """ - - ep = TOM.EntityPartitionSource() - ep.Name = table_name - ep.EntityName = entity_name - if expression is None: - ep.ExpressionSource = self.model.Expressions['DatabaseQuery'] - else: - ep.ExpressionSource = expression - p = TOM.Partition() - p.Name = table_name - p.Source = ep - p.Mode = TOM.ModeType.DirectLake - if description is not None: - p.Description = description - - self.model.Tables[table_name].Partitions.Add(p) - - def set_alternate_of(self, table_name: str, column_name: str, summarization_type: str, base_table: str, base_column: Optional[str] = None): - - """ - Sets the 'alternate of' property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - summarization_type : str - The summarization type for the column. - base_table : str - Name of the base table for aggregation. - base_column : str - Name of the base column for aggregation - - Returns - ------- - - """ - - if base_column is not None and base_table is None: - print(f"ERROR: If you specify the base table you must also specify the base column") - - summarization_type = summarization_type.replace(' ','').capitalize().replace('Groupby', 'GroupBy') - - summarizationTypes = ['Sum', 'GroupBy', 'Count', 'Min', 'Max'] - if summarization_type not in summarizationTypes: - print(f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}.") - return - - ao = TOM.AlternateOf() - ao.Summarization = System.Enum.Parse(TOM.SummarizationType, summarization_type) - if base_column is not None: - ao.BaseColumn = self.model.Tables[base_table].Columns[base_column] - else: - ao.BaseTable = self.model.Tables[base_table] - - self.model.Tables[table_name].Columns[column_name].AlternateOf = ao - - # Hide agg table and columns - t = self.model.Tables[table_name] - t.IsHidden = True - for c in t.Columns: - c.IsHidden = True - - def remove_alternate_of(self, table_name: str, column_name: str): - - """ - Removes the 'alternate of' property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - - Returns - ------- - - """ - - self.model.Tables[table_name].Columns[column_name].AlternateOf = None - - def get_annotations(self, object) -> 'Microsoft.AnalysisServices.Tabular.Annotation': - - """ - Shows all annotations for a given object within a semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.Annotation - TOM objects of all the annotations on a particular object within the semantic model. - """ - - #df = pd.DataFrame(columns=['Name', 'Value']) - - for a in object.Annotations: - #new_data = {'Name': a.Name, 'Value': a.Value} - yield a - #df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - def set_annotation(self, object, name: str, value: str): - - """ - Sets an annotation on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - value : str - Value of the annotation. - - Returns - ------- - - """ - - ann = TOM.Annotation() - ann.Name = name - ann.Value = value - - try: - object.Annotations[name].Value = value - except: - object.Annotations.Add(ann) - - def get_annotation_value(self, object, name: str): - - """ - Obtains the annotation value for a given annotation on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - - Returns - ------- - str - The annotation value. - """ - - return object.Annotations[name].Value - - def remove_annotation(self, object, name: str): - - """ - Removes an annotation on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - - Returns - ------- - - """ - - object.Annotations.Remove(name) - - def clear_annotations(self, object): - - """ - Removes all annotations on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - - """ - - object.Annotations.Clear() - - def get_extended_properties(self, object) -> 'Microsoft.AnalysisServices.Tabular.ExtendedProperty': - - """ - Retrieves all extended properties on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.ExtendedPropertiesCollection - TOM Objects of all the extended properties. - """ - - #df = pd.DataFrame(columns=['Name', 'Value', 'Type']) - - for a in object.ExtendedProperties: - yield a - #new_data = {'Name': a.Name, 'Value': a.Value, 'Type': a.Type} - #df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - #return df - - def set_extended_property(self, object, extended_property_type: str, name: str, value: str): - - """ - Sets an extended property on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - extended_property_type : str - The extended property type. Options: 'Json', 'String'. - name : str - Name of the extended property. - value : str - Value of the extended property. - - Returns - ------- - - """ - - extended_property_type = extended_property_type.title() - - if extended_property_type == 'Json': - ep = TOM.JsonExtendedProperty() - else: - ep = TOM.StringExtendedProperty() - - ep.Name = name - ep.Value = value - - try: - object.ExtendedProperties[name].Value = value - except: - object.ExtendedProperties.Add(ep) - - def get_extended_property_value(self, object, name: str): - - """ - Retrieves the value of an extended property for an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - - Returns - ------- - str - The extended property value. - """ - - return object.ExtendedProperties[name].Value - - def remove_extended_property(self, object, name: str): - - """ - Removes an extended property on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - name : str - Name of the annotation. - - Returns - ------- - - """ - - object.ExtendedProperties.Remove(name) - - def clear_extended_properties(self, object): - - """ - Removes all extended properties on an object within the semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - - """ - - object.ExtendedProperties.Clear() - - def in_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - - """ - Indicates whether an object is contained within a given perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspecitve_name : str - Name of the perspective. - - Returns - ------- - bool - An indication as to whether the object is contained within the given perspective. - """ - - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] - objectType = object.ObjectType - - if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") - return - - object.Model.Perspectives[perspective_name] - - try: - if objectType == TOM.ObjectType.Table: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Name] - elif objectType == TOM.ObjectType.Column: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns[object.Name] - elif objectType == TOM.ObjectType.Measure: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures[object.Name] - elif objectType == TOM.ObjectType.Hierarchy: - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies[object.Name] - return True - except: - return False - - def add_to_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - - """ - Adds an object to a perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspective_name : str - Name of the perspective. - - Returns - ------- - - """ - - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] - objectType = object.ObjectType - - if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") - return - try: - object.Model.Perspectives[perspective_name] - except: - print(f"The '{perspective_name}' perspective does not exist.") - return - - #try: - if objectType == TOM.ObjectType.Table: - pt = TOM.PerspectiveTable() - pt.Table = object - object.Model.Perspectives[perspective_name].PerspectiveTables.Add(pt) - elif objectType == TOM.ObjectType.Column: - pc = TOM.PerspectiveColumn() - pc.Column = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns.Add(pc) - elif objectType == TOM.ObjectType.Measure: - pm = TOM.PerspectiveMeasure() - pm.Measure = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures.Add(pm) - elif objectType == TOM.ObjectType.Hierarchy: - ph = TOM.PerspectiveHierarchy() - ph.Hierarchy = object - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies.Add(ph) - #except: - # pass - - def remove_from_perspective(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], perspective_name: str): - - """ - Removes an object from a perspective. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - perspective_name : str - Name of the perspective. - - Returns - ------- - - """ - - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] - objectType = object.ObjectType - - if objectType not in validObjects: - print(f"Only the following object types are valid for perspectives: {validObjects}.") - return - try: - object.Model.Perspectives[perspective_name] - except: - print(f"The '{perspective_name}' perspective does not exist.") - return - - #try: - if objectType == TOM.ObjectType.Table: - pt = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables.Remove(pt) - elif objectType == TOM.ObjectType.Column: - pc = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveColumns.Remove(pc) - elif objectType == TOM.ObjectType.Measure: - pm = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveMeasures.Remove(pm) - elif objectType == TOM.ObjectType.Hierarchy: - ph = object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies[object.Name] - object.Model.Perspectives[perspective_name].PerspectiveTables[object.Parent.Name].PerspectiveHierarchies.Remove(ph) - #except: - # pass - - def set_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], language: str, property: str, value: str): - - """ - Sets a translation value for an object's property. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - language : str - The language code. - property : str - The property to set. Options: 'Name', 'Description', 'Display Folder'. - value : str - The transation value. - - Returns - ------- - - """ - - self.add_translation(language = language) - - property = property.title() - - validObjects = [TOM.ObjectType.Table, TOM.ObjectType.Column, TOM.ObjectType.Measure, TOM.ObjectType.Hierarchy] #, 'Level' - - if object.ObjectType not in validObjects: - print(f"Translations can only be set to {validObjects}.") - return - - mapping = { - 'Name': TOM.TranslatedProperty.Caption, - 'Description': TOM.TranslatedProperty.Description, - 'Display Folder': TOM.TranslatedProperty.DisplayFolder - } - - prop = mapping.get(property) - - try: - object.Model.Cultures[language] - except: - print(f"The '{language}' translation language does not exist in the semantic model.") - return - - object.Model.Cultures[language].ObjectTranslations.SetTranslation(object, prop, value) - - - def remove_translation(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure', 'TOM.Hierarchy'], language: str): - - """ - Removes an object's translation value. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - language : str - The language code. - - Returns - ------- - - """ - - o = object.Model.Cultures[language].ObjectTranslations[object, TOM.TranslatedProperty.Caption] - object.Model.Cultures[language].ObjectTranslations.Remove(o) - - def remove_object(self, object): - - """ - Removes an object from a semantic model. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column/measure) within a semantic model. - - Returns - ------- - - """ - - objType = object.ObjectType - - # Have to remove translations and perspectives on the object before removing it. - if objType in ['Table', 'Column', 'Measure', 'Hierarchy', 'Level']: - for lang in object.Model.Cultures: - try: - self.remove_translation(object = object, language = lang.Name) - except: - pass - if objType in ['Table', 'Column', 'Measure', 'Hierarchy']: - for persp in object.Model.Perspectives: - try: - self.remove_from_perspective(object = object, perspective_name = persp.Name) - except: - pass - - if objType == TOM.ObjectType.Column: - object.Parent.Columns.Remove(object.Name) - elif objType == TOM.ObjectType.Measure: - object.Parent.Measures.Remove(object.Name) - elif objType == TOM.ObjectType.Hierarchy: - object.Parent.Hierarchies.Remove(object.Name) - elif objType == TOM.ObjectType.Level: - object.Parent.Levels.Remove(object.Name) - elif objType == TOM.ObjectType.Partition: - object.Parent.Partitions.Remove(object.Name) - elif objType == TOM.ObjectType.Expression: - object.Parent.Expressions.Remove(object.Name) - elif objType == TOM.ObjectType.DataSource: - object.Parent.DataSources.Remove(object.Name) - elif objType == TOM.ObjectType.Role: - object.Parent.Roles.Remove(object.Name) - elif objType == TOM.ObjectType.Relationship: - object.Parent.Relationships.Remove(object.Name) - elif objType == TOM.ObjectType.Culture: - object.Parent.Cultures.Remove(object.Name) - elif objType == TOM.ObjectType.Perspective: - object.Parent.Perspectives.Remove(object.Name) - elif objType == TOM.ObjectType.CalculationItem: - object.Parent.CalculationItems.Remove(object.Name) - elif objType == TOM.ObjectType.TablePermission: - object.Parent.TablePermissions.Remove(object.Name) - - def used_in_relationships(self, object: Union['TOM.Table', 'TOM.Column']): - - """ - Shows all relationships in which a table/column is used. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.RelationshipCollection - All relationships in which the table/column is used. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Table: - for r in self.model.Relationships: - if r.FromTable.Name == object.Name or r.ToTable.Name == object.Name: - yield r#, 'Table' - elif objType == TOM.ObjectType.Column: - for r in self.model.Relationships: - if (r.FromTable.Name == object.Parent.Name and r.FromColumn.Name == object.Name) or \ - (r.ToTable.Name == object.Parent.Name and r.ToColumn.Name == object.Name): - yield r#, 'Column' - - def used_in_levels(self, column: 'TOM.Column'): - - """ - Shows all levels in which a column is used. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.LevelCollection - All levels in which the column is used. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for l in self.all_levels(): - if l.Parent.Table.Name == column.Parent.Name and l.Column.Name == column.Name: - yield l - - def used_in_hierarchies(self, column: 'TOM.Column'): - - """ - Shows all hierarchies in which a column is used. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.HierarchyCollection - All hierarchies in which the column is used. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for l in self.all_levels(): - if l.Parent.Table.Name == column.Parent.Name and l.Column.Name == column.Name: - yield l.Parent - - def used_in_sort_by(self, column: 'TOM.Column'): - - """ - Shows all columns in which a column is used for sorting. - - Parameters - ---------- - object : TOM Object - An column object within a semantic model. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.ColumnCollection - All columns in which the column is used for sorting. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - for c in self.model.Tables[column.Parent.Name].Columns: - if c.SortByColumn == column: - yield c - - def used_in_rls(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): - - """ - Identifies the filter expressions which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Rows Allowed'] - - if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def used_in_data_coverage_definition(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): - - """ - Identifies the ... which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Data Coverage Definition'] - - if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def used_in_calc_item(self, object: Union['TOM.Table', 'TOM.Column', 'TOM.Measure'], dependencies: pd.DataFrame): - - """ - Identifies the ... which reference a given object. - - Parameters - ---------- - object : TOM Object - An object (i.e. table/column) within a semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - - """ - - objType = object.ObjectType - - df_filt = dependencies[dependencies['Object Type'] == 'Calculation Item'] - - if objType == TOM.ObjectType.Table: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Table') & (df_filt['Referenced Table'] == object.Name)] - tbls = fil['Table Name'].unique().tolist() - for t in self.model.Tables: - if t.Name in tbls: - yield t - elif objType == TOM.ObjectType.Column: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Column') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - cols = fil['Full Object Name'].unique().tolist() - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - elif objType == TOM.ObjectType.Measure: - fil = df_filt[(df_filt['Referenced Object Type'] == 'Measure') & (df_filt['Referenced Table'] == object.Parent.Name) & (df_filt['Referenced Object'] == object.Name)] - meas = fil['Object Name'].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - - def hybrid_tables(self): - - """ - Outputs the hybrid tables within a semantic model. - - Parameters - ---------- - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection - All hybrid tables within a semantic model. - """ - - for t in self.model.Tables: - if any(p.Mode == TOM.ModeType.Import for p in t.Partitions): - if any(p.Mode == TOM.ModeType.DirectQuery for p in t.Partitions): - yield t - - def date_tables(self): - - """ - Outputs the tables which are marked as date tables within a semantic model. - - Parameters - ---------- - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection - All tables marked as date tables within a semantic model. - """ - - for t in self.model.Tables: - if t.DataCategory == 'Time': - if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): - yield t - - def is_hybrid_table(self, table_name: str): - - """ - Identifies if a table is a hybrid table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is a hybrid table. - """ - - isHybridTable = False - - if any(p.Mode == TOM.ModeType.Import for p in self.model.Tables[table_name].Partitions): - if any(p.Mode == TOM.ModeType.DirectQuery for p in self.model.Tables[table_name].Partitions): - isHybridTable = True - - return isHybridTable - - def is_date_table(self, table_name: str): - - """ - Identifies if a table is marked as a date table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is marked as a date table. - """ - - isDateTable = False - t = self.model.Tables[table_name] - - if t.DataCategory == 'Time': - if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): - isDateTable = True - - return isDateTable - - def mark_as_date_table(self, table_name: str, column_name: str): - - """ - Marks a table as a date table. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the date column in the table. - - Returns - ------- - - """ - - t = self.model.Tables[table_name] - c = t.Columns[column_name] - if c.DataType != TOM.DataType.DateTime: - print(f"{red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type.") - return - - daxQuery = f""" - define measure '{table_name}'[test] = - var mn = MIN('{table_name}'[{column_name}]) - var ma = MAX('{table_name}'[{column_name}]) - var x = COUNTROWS(DISTINCT('{table_name}'[{column_name}])) - var y = DATEDIFF(mn, ma, DAY) + 1 - return if(y = x, 1,0) - - EVALUATE - SUMMARIZECOLUMNS( - "1",[test] - ) - """ - df = fabric.evaluate_dax(dataset=dataset, workspace=workspace, dax_string = daxQuery) - value = df['1'].iloc[0] - if value != '1': - print(f"{red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values.") - return - - # Mark as a date table - t.DataCategory = 'Time' - c.Columns[column_name].IsKey = True - print(f"{green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key.") - - def has_aggs(self): - - """ - Identifies if a semantic model has any aggregations. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has any aggregations. - """ - - hasAggs = False - - for c in self.all_columns(): - if c.AlterateOf is not None: - hasAggs = True - - return hasAggs - - def is_agg_table(self, table_name: str): - - """ - Identifies if a table has aggregations. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table has any aggregations. - """ - - t = self.model.Tables[table_name] - - return any(c.AlternateOf is not None for c in t.Columns) - - def has_hybrid_table(self): - - """ - Identifies if a semantic model has a hybrid table. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has a hybrid table. - """ - - hasHybridTable = False - - for t in self.model.Tables: - if self.is_hybrid_table(table_name = t.Name): - hasHybridTable = True - - return hasHybridTable - - def has_date_table(self): - - """ - Identifies if a semantic model has a table marked as a date table. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model has a table marked as a date table. - """ - - hasDateTable = False - - for t in self.model.Tables: - if self.is_date_table(table_name = t.Name): - hasDateTable = True - - return hasDateTable - - def is_direct_lake(self): - - """ - Identifies if a semantic model is in Direct Lake mode. - - Parameters - ---------- - - Returns - ------- - bool - Indicates if the semantic model is in Direct Lake mode. - """ - - return any(p.Mode == TOM.ModeType.DirectLake for t in self.model.Tables for p in t.Partitions) - - def is_field_parameter(self, table_name: str): - - """ - Identifies if a table is a field parameter. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is a field parameter. - """ - - t = self.model.Tables[table_name] - - return any(p.SourceType == TOM.PartitionSourceType.Calculated and 'NAMEOF(' in p.Source.Expression for p in t.Partitions) and all('[Value' in c.SourceColumn for c in t.Columns if c.Type != TOM.ColumnType.RowNumber) and t.Columns.Count == 4 - - def is_auto_date_table(self, table_name: str): - - """ - Identifies if a table is an auto-date table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - Indicates if the table is an auto-date table. - """ - - isAutoDate = False - - t = self.model.Tables[table_name] - - if t.Name.startswith('LocalDateTable_') or t.Name.startswith('DateTableTemplate_'): - if any(p.SourceType == TOM.PartitionSourceType.Calculated for p in t.Partitions): - isAutoDate = True - - return isAutoDate - - def set_kpi(self, measure_name: str, target: Union[int,float,str], lower_bound: float, upper_bound: float, lower_mid_bound: Optional[float] = None, upper_mid_bound: Optional[float] = None, status_type: Optional[str] = None, status_graphic: Optional[str] = None): - - """ - Sets the properties to add/update a KPI for a measure. - - Parameters - ---------- - measure_name : str - Name of the measure. - target : str, int, float - The target for the KPI. This can either be a number or the name of a different measure in the semantic model. - lower_bound: float - The lower bound for the KPI. - upper_bound : float - The upper bound for the KPI. - lower_mid_bound : float, default=None - The lower-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. - upper_mid_bound : float, default=None - The upper-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. - status_type : str, default=None - The status type of the KPI. Options: 'Linear', 'LinearReversed', 'Centered', 'CenteredReversed'. - Defaults to None which resolvs to 'Linear'. - status_graphic : str, default=None - The status graphic for the KPI. - Defaults to 'Three Circles Colored'. - - Returns - ------- - - """ - - #https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md - - if measure_name == target: - print(f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter.") - return - - if status_graphic is None: - status_graphic = 'Three Circles Colored' - - statusType = ['Linear', 'LinearReversed', 'Centered', 'CenteredReversed'] - status_type = status_type.title().replace(' ','') - - if status_type is None: - status_type = 'Linear' - - if status_type not in statusType: - print(f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}.") - return - - if status_type in ['Linear', 'LinearReversed']: - if upper_bound is not None or lower_mid_bound is not None: - print(f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None.") - return - elif upper_bound <= lower_bound: - print(f"The upper_bound must be greater than the lower_bound.") - return - - if status_type in ['Centered', 'CenteredReversed']: - if upper_mid_bound is None or lower_mid_bound is None: - print(f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types.") - return - elif upper_bound <= upper_mid_bound: - print(f"The upper_bound must be greater than the upper_mid_bound.") - elif upper_mid_bound <= lower_mid_bound: - print(f"The upper_mid_bound must be greater than the lower_mid_bound.") - elif lower_mid_bound <= lower_bound: - print(f"The lower_mid_bound must be greater than the lower_bound.") - - try: - table_name = next(m.Parent.Name for m in self.all_measures() if m.Name == measure_name) - except: - print(f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'.") - return - - graphics = ['Cylinder', 'Five Bars Colored', 'Five Boxes Colored', 'Gauge - Ascending', 'Gauge - Descending', 'Road Signs', 'Shapes', 'Standard Arrow', 'Three Circles Colored', 'Three Flags Colored', 'Three Stars Colored', 'Three Symbols Uncircled Colored', 'Traffic Light', 'Traffic Light - Single', 'Variance Arrow', 'Status Arrow - Ascending', 'Status Arrow - Descending'] - - if status_graphic not in graphics: - print(f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}.") - return - - measure_target = True - - try: - float(target) - tgt = str(target) - measure_target = False - except: - try: - tgt = next(format_dax_object_name(m.Parent.Name, m.Name) for m in self.all_measures() if m.Name == target) - except: - print(f"The '{target}' measure does not exist in the '{dataset}' semantic model within the '{workspace}'.") - - if measure_target: - expr = f"var x = [{measure_name}]/[{target}]\nreturn" - else: - expr = f"var x = [{measure_name}\nreturn" - - if status_type == 'Linear': - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_bound},-1,\n\t\tif(x<{upper_bound},0,1)))" - elif status_type == 'LinearReversed': - expr = f"{expr}\nif(isblank(x),blank(),\nif(x<{lower_bound},1,\n\t\tif(x<{upper_bound},0,-1)))" - elif status_type == 'Centered': - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},-1,0),\n\t\t\tif(x<{upper_mid_bound},1,\n\t\t\t\tif(x<{upper_bound}0,-1))))" - elif status_type == 'CenteredReversed': - expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},1,0),\n\t\t\tif(x<{upper_mid_bound},-1,\n\t\t\t\tif(x<{upper_bound}0,1))))" - - kpi = TOM.KPI() - kpi.TargetExpression = tgt - kpi.StatusGraphic = status_graphic - kpi.StatusExpression = expr - - ms = self.model.Tables[table_name].Measures[measure_name] - try: - ms.KPI.TargetExpression = tgt - ms.KPI.StatusGraphic = status_graphic - ms.KPI.StatusExpression = expr - except: - ms.KPI = kpi - - def set_aggregations(self, table_name: str, agg_table_name: str): - - """ - Sets the aggregations (alternate of) for all the columns in an aggregation table based on a base table. - - Parameters - ---------- - table_name : str - Name of the base table. - agg_table_name : str - Name of the aggregation table. - - Returns - ------- - - """ - - for c in self.model.Tables[agg_table_name].Columns: - - dataType = c.DataType - - if dataType in [TOM.DataType.String, TOM.DataType.Boolean, TOM.DataType.DateTime]: - sumType = 'GroupBy' - else: - sumType = 'Sum' - - self.set_alternate_of(table_name = agg_table_name, column_name = c.Name, base_table = table_name, base_column = c.Name, summarization_type = sumType) - - def set_is_available_in_mdx(self, table_name: str, column_name: str, value: Optional[bool] = False): - - """ - Sets the IsAvailableInMdx property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : bool, default=False - The IsAvailableInMdx property value. - - Returns - ------- - - """ - - self.model.Tables[table_name].Columns[column_name].IsAvailableInMdx = value - - def set_summarize_by(self, table_name: str, column_name: str, value: Optional[str] = None): - - """ - Sets the SummarizeBy property on a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : bool, default=None - The SummarizeBy property value. - Defaults to none which resolves to 'Default'. - - Returns - ------- - - """ - - values = ['Default', 'None', 'Sum', 'Min', 'Max', 'Count', 'Average', 'DistinctCount'] - #https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby - - if value is None: - value = 'Default' - value = value.capitalize().replace('Distinctcount', 'DistinctCount').replace('Avg', 'Average') - - if value not in values: - print(f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}.") - return - - self.model.Tables[table_name].Columns[column_name].SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, value) - - def set_direct_lake_behavior(self, direct_lake_behavior: str): - - """ - Sets the Direct Lake Behavior property for a semantic model. - - Parameters - ---------- - direct_lake_behavior : str - The DirectLakeBehavior property value. - - Returns - ------- - - """ - - direct_lake_behavior = direct_lake_behavior.capitalize() - if direct_lake_behavior.startswith('Auto'): - direct_lake_behavior = 'Automatic' - elif direct_lake_behavior.startswith('Directl') or direct_lake_behavior == 'Dl': - direct_lake_behavior = 'DirectLakeOnly' - elif direct_lake_behavior.startswith('Directq') or direct_lake_behavior == 'Dq': - direct_lake_behavior = 'DirectQueryOnly' - - dlValues = ['Automatic', 'DirectLakeOnly', 'DirectQueryOnly'] - - if direct_lake_behavior not in dlValues: - print(f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}.") - return - - self.model.DirectLakeBehavior = System.Enum.Parse(TOM.DirectLakeBehavior, direct_lake_behavior) - - def add_table(self, name: str, description: Optional[str] = None, data_category: Optional[str] = None, hidden: Optional[bool] = False): - - """ - Adds a table to the semantic model. - - Parameters - ---------- - name : str - Name of the table. - description : str, default=None - A description of the table. - data_catgegory : str, default=None - The data category for the table. - hidden : bool, default=False - Whether the table is hidden or visible. - - Returns - ------- - - """ - - t = TOM.Table() - t.Name = name - if description is not None: - t.Description = description - if data_category is not None: - t.DataCategory = data_category - t.Hidden = hidden - self.model.Tables.Add(t) - - def add_calculated_table(self, name: str, expression: str, description: Optional[str] = None, data_category: Optional[str] = None, hidden: Optional[bool] = False): - - """ - Adds a calculated table to the semantic model. - - Parameters - ---------- - name : str - Name of the table. - expression : str - The DAX expression for the calculated table. - description : str, default=None - A description of the table. - data_catgegory : str, default=None - The data category for the table. - hidden : bool, default=False - Whether the table is hidden or visible. - - Returns - ------- - - """ - - par = TOM.Partition() - par.Name = name - - parSource = TOM.CalculatedPartitionSource() - parSource.Expression = expression - par.Source = parSource - - t = TOM.Table() - t.Name = name - if description is not None: - t.Description = description - if data_category is not None: - t.DataCategory = data_category - t.Hidden = hidden - t.Partitions.Add(par) - self.model.Tables.Add(t) - - def add_field_parameter(self, table_name: str, objects: List[str]): - - """ - Adds a table to the semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - objects : List[str] - The columns/measures to be included in the field parameter. - Columns must be specified as such : 'Table Name'[Column Name]. - Measures may be formatted as '[Measure Name]' or 'Measure Name'. - - Returns - ------- - - """ - - if isinstance(objects, str): - print(f"The 'objects' parameter must be a list of columns/measures.") - return - if len(objects) == 1: - print(f"There must be more than one object (column/measure) within the objects parameter.") - return - - expr = '' - i=0 - for obj in objects: - success = False - for m in self.all_measures(): - if obj == '[' + m.Name + ']' or obj == m.Name: - expr = expr + '\n\t' + '("' + m.Name + '", NAMEOF([' + m.Name + ']), ' + str(i) + '),' - success = True - for c in self.all_columns(): - fullObjName = format_dax_object_name(c.Parent.Name, c.Name) - if obj == fullObjName or obj == c.Parent.Name + '[' + c.Name + ']': - expr = expr + '\n\t' + '("' + c.Name + '", NAMEOF(' + fullObjName + '), ' + str(i) + '),' - success = True - if not success: - print(f"The '{obj}' object was not found in the '{dataset}' semantic model.") - return - else: - i+=1 - - expr = '{' + expr.rstrip(',') + '\n}' - - self.add_calculated_table(name = table_name, expression = expr) - - col2 = table_name + ' Fields' - col3 = table_name + ' Order' - - self.add_calculated_table_column(table_name = table_name, column_name = table_name, source_column = '[Value1]', data_type = 'String', hidden = False ) - self.add_calculated_table_column(table_name = table_name, column_name = col2, source_column = '[Value2]', data_type = 'String', hidden = True ) - self.add_calculated_table_column(table_name = table_name, column_name = col3, source_column = '[Value3]', data_type = 'Int64', hidden = True ) - - self.set_extended_property(self = self, - object = self.model.Tables[table_name].Columns[col2], - extended_property_type = 'Json', - name = 'ParameterMetadata', - value = '{"version":3,"kind":2}') - - rcd = TOM.RelatedColumnDetails() - gpc = TOM.GroupByColumn() - gpc.GroupingColumn = self.model.Tables[table_name].Columns[col2] - rcd.GroupByColumns.Add(gpc) - - # Update column properties - self.model.Tables[table_name].Columns[col2].SortByColumn = self.model.Tables[table_name].Columns[col3] - self.model.Tables[table_name].Columns[table_name].RelatedColumnDetails = rcd - - fpAdded.append(table_name) - - def remove_vertipaq_annotations(self): - - """ - Removes the annotations set using the [set_vertipaq_annotations] function. - - Parameters - ---------- - - Returns - ------- - - """ - - for t in self.model.Tables: - for a in t.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = t, name = a.Name) - for c in t.Columns: - for a in c.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = c, name = a.Name) - for h in t.Hierarchies: - for a in h.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = h, name = a.Name) - for p in t.Partitions: - for a in p.Annotations: - if a.Name.startswith('Vertipaq_'): - self.remove_annotation(object = p, name = a.Name) - for r in self.model.Relationships: - for a in r.Annotations: - if a.Name.startswith('Veripaq_'): - self.remove_annotation(object = r, name = a.Name) - - def set_vertipaq_annotations(self): - - """ - Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. - - Parameters - ---------- - - Returns - ------- - - """ - - dfT = fabric.list_tables(dataset = dataset, workspace = workspace, extended=True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended=True) - #intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - dfCSum = dfC.groupby(['Table Name'])['Total Size'].sum().reset_index() - dfTable = pd.merge(dfT[['Name', 'Type', 'Row Count']], dfCSum[['Table Name', 'Total Size']], left_on = 'Name', right_on = 'Table Name', how = 'inner') - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace, extended=True) - dfP['Records per Segment'] = round(dfP['Record Count'] / dfP['Segment Count'],2) - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace, extended=True) - dfR = list_relationships(dataset = dataset, workspace = workspace, extended=True) - - for t in self.model.Tables: - dfT_filt = dfTable[dfTable['Name'] == t.Name] - rowCount = str(dfT_filt['Row Count'].iloc[0]) - totalSize = str(dfT_filt['Total Size'].iloc[0]) - self.set_annotation(object = t, name = 'Vertipaq_RowCount', value = rowCount) - self.set_annotation(object = t, name = 'Vertipaq_TableSize', value = totalSize) - for c in t.Columns: - dfC_filt = dfC[(dfC['Table Name'] == t.Name) & (dfC['Column Name'] == c.Name)] - totalSize = str(dfC_filt['Total Size'].iloc[0]) - dataSize = str(dfC_filt['Data Size'].iloc[0]) - dictSize = str(dfC_filt['Dictionary Size'].iloc[0]) - hierSize = str(dfC_filt['Hierarchy Size'].iloc[0]) - card = str(dfC_filt['Column Cardinality'].iloc[0]) - self.set_annotation(object = c, name = 'Vertipaq_TotalSize', value = totalSize) - self.set_annotation(object = c, name = 'Vertipaq_DataSize', value = dataSize) - self.set_annotation(object = c, name = 'Vertipaq_DictionarySize', value = dictSize) - self.set_annotation(object = c, name = 'Vertipaq_HierarchySize', value = hierSize) - self.set_annotation(object = c, name = 'Vertipaq_Cardinality', value = card) - for p in t.Partitions: - dfP_filt = dfP[(dfP['Table Name'] == t.Name) & (dfP['Partition Name'] == p.Name)] - recordCount = str(dfP_filt['Record Count'].iloc[0]) - segmentCount = str(dfP_filt['Segment Count'].iloc[0]) - rpS = str(dfP_filt['Records per Segment'].iloc[0]) - self.set_annotation(object = p, name = 'Vertipaq_RecordCount', value = recordCount) - self.set_annotation(object = p, name = 'Vertipaq_SegmentCount', value = segmentCount) - self.set_annotation(object = p, name = 'Vertipaq_RecordsPerSegment', value = rpS) - for h in t.Hierarchies: - dfH_filt = dfH[(dfH['Table Name'] == t.Name) & (dfH['Hierarchy Name'] == h.Name)] - usedSize = str(dfH_filt['Used Size'].iloc[0]) - self.set_annotation(object = h, name = 'Vertipaq_UsedSize', value = usedSize) - for r in self.model.Relationships: - dfR_filt = dfR[dfR['Relationship Name'] == r.Name] - relSize = str(dfR_filt['Used Size'].iloc[0]) - self.set_annotation(object = r, name = 'Vertipaq_UsedSize', value = relSize) - - try: - runId = self.get_annotation_value(object = self.model, name = 'Vertipaq_Run') - runId = str(int(runId) + 1) - except: - runId = '1' - self.set_annotation(object = self.model, name = 'Vertipaq_Run', value = runId) - - def row_count(self, object: Union['TOM.Partition', 'TOM.Table']): - - """ - Obtains the row count of a table or partition within a semantic model. - - Parameters - ---------- - object : TOM Object - The table/partition object within the semantic model. - - Returns - ------- - int - Number of rows within the TOM object. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Table: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RowCount') - elif objType == TOM.ObjectType.Partition: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RecordCount') - - return int(result) - - def records_per_segment(self, object: 'TOM.Partition'): - - """ - Obtains the records per segment of a partition within a semantic model. - - Parameters - ---------- - object : TOM Object - The partition object within the semantic model. - - Returns - ------- - float - Number of records per segment within the partition. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Partition: - result = self.get_annotation_value(object = object, name = 'Vertipaq_RecordsPerSegment') - - return float(result) - - def used_size(self, object: Union['TOM.Hierarchy', 'TOM.Relationship']): - - """ - Obtains the used size of a hierarchy or relationship within a semantic model. - - Parameters - ---------- - object : TOM Object - The hierarhcy/relationship object within the semantic model. - - Returns - ------- - int - Used size of the TOM object. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Hierarchy: - result = self.get_annotation_value(object = object, name = 'Vertipaq_UsedSize') - elif objType == TOM.ObjectType.Relationship: - result = self.get_annotation_value(object = object, name = 'Vertipaq_UsedSize') - - return int(result) - - def data_size(self, column: 'TOM.Column'): - - """ - Obtains the data size of a column within a semantic model. - - Parameters - ---------- - column : TOM Object - The column object within the semantic model. - - Returns - ------- - int - Data size of the TOM column. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_DataSize') - - return int(result) - - def dictionary_size(self, column: 'TOM.Column'): - - """ - Obtains the dictionary size of a column within a semantic model. - - Parameters - ---------- - column : TOM Object - The column object within the semantic model. - - Returns - ------- - int - Dictionary size of the TOM column. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_DictionarySize') - - return int(result) - - def total_size(self, object: Union['TOM.Table', 'TOM.Column']): - - """ - Obtains the data size of a table/column within a semantic model. - - Parameters - ---------- - object : TOM Object - The table/column object within the semantic model. - - Returns - ------- - int - Total size of the TOM table/column. - """ - - objType = object.ObjectType - - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = object, name = 'Vertipaq_TotalSize') - elif objType == TOM.ObjectType.Table: - result = self.get_annotation_value(object = object, name = 'Vertipaq_TotalSize') - - return int(result) - - def cardinality(self, column: 'TOM.Column'): - - """ - Obtains the cardinality of a column within a semantic model. - - Parameters - ---------- - column : TOM Object - The column object within the semantic model. - - Returns - ------- - int - Cardinality of the TOM column. - """ - - objType = column.ObjectType - - if objType == TOM.ObjectType.Column: - result = self.get_annotation_value(object = column, name = 'Vertipaq_Cardinality') - - return int(result) - - def depends_on(self, object, dependencies: pd.DataFrame): - - """ - Obtains the objects on which the specified object depends. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - Objects on which the specified object depends. - """ - - objType = object.ObjectType - objName = object.Name - objParentName = object.Parent.Name - - if objType == TOM.ObjectType.Table: - objParentName = objName - - fil = dependencies[(dependencies['Object Type'] == objType) & (dependencies['Table Name'] == objParentName) & (dependencies['Object Name'] == objName)] - meas = fil[fil['Referenced Object Type'] == 'Measure']['Referenced Object'].unique().tolist() - cols = fil[fil['Referenced Object Type'] == 'Column']['Referenced Full Object Name'].unique().tolist() - tbls = fil[fil['Referenced Object Type'] == 'Table']['Referenced Table'].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - for t in self.model.Tables: - if t.Name in tbls: - yield t - - def referenced_by(self, object, dependencies: pd.DataFrame): - - """ - Obtains the objects which reference the specified object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection - Objects which reference the specified object. - """ - - objType = object.ObjectType - objName = object.Name - objParentName = object.Parent.Name - - if objType == TOM.ObjectType.Table: - objParentName = objName - - fil = dependencies[(dependencies['Referenced Object Type'] == objType) & (dependencies['Referenced Table'] == objParentName) & (dependencies['Referenced Object'] == objName)] - meas = fil[fil['Object Type'] == 'Measure']['Object Name'].unique().tolist() - cols = fil[fil['Object Type'].isin(['Column', 'Calc Column'])]['Full Object Name'].unique().tolist() - tbls = fil[fil['Object Type'].isin(['Table', 'Calc Table'])]['Table Name'].unique().tolist() - for m in self.all_measures(): - if m.Name in meas: - yield m - for c in self.all_columns(): - if format_dax_object_name(c.Parent.Name, c.Name) in cols: - yield c - for t in self.model.Tables: - if t.Name in tbls: - yield t - - def fully_qualified_measures(self, object: 'TOM.Measure', dependencies: pd.DataFrame): - - """ - Obtains all fully qualified measure references for a given object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.MeasureCollection - All fully qualified measure references for a given object. - """ - - for obj in self.depends_on(object = object, dependencies=dependencies): - if obj.ObjectType == TOM.ObjectType.Measure: - if (obj.Parent.Name + obj.Name in object.Expression) or (format_dax_object_name(obj.Parent.Name, obj.Name) in object.Expression): - yield obj - - def unqualified_columns(self, object: 'TOM.Column', dependencies: pd.DataFrame): - - """ - Obtains all unqualified column references for a given object. - - Parameters - ---------- - object : TOM Object - The TOM object within the semantic model. - dependencies : pandas.DataFrame - A pandas dataframe with the output of the 'get_model_calc_dependencies' function. - - Returns - ------- - Microsoft.AnalysisServices.Tabular.ColumnCollection - All unqualified column references for a given object. - """ - - def create_pattern(a, b): - return r'(? 0: - usingView = True - - return usingView - - def has_incremental_refresh_policy(self, table_name: str): - - """ - Identifies whether a table has an incremental refresh policy. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - bool - An indicator whether a table has an incremental refresh policy. - """ - - hasRP = False - rp = self.model.Tables[table_name].RefreshPolicy - - if rp is not None: - hasRP = True - - return hasRP - - def show_incremental_refresh_policy(self, table_name: str): - - """ - Prints the incremental refresh policy for a table. - - Parameters - ---------- - table_name : str - Name of the table. - - Returns - ------- - - """ - - rp = self.model.Tables[table_name].RefreshPolicy - - if rp is None: - print(f"The '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace does not have an incremental refresh policy.") - else: - print(f"Table Name: {table_name}") - rwGran = str(rp.RollingWindowGranularity).lower() - icGran = str(rp.IncrementalGranularity).lower() - if rp.RollingWindowPeriods > 1: - print(f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date.") - else: - print(f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date.") - if rp.IncrementalPeriods > 1: - print(f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date.") - else: - print(f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date.") - - if rp.Mode == TOM.RefreshPolicyMode.Hybrid: - print(f"{checked} Get the latest data in real time with DirectQuery (Premium only)") - else: - print(f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)") - if rp.IncrementalPeriodsOffset == -1: - print(f"{checked} Only refresh complete days") - else: - print(f"{unchecked} Only refresh complete days") - if len(rp.PollingExpression) > 0: - pattern = r'\[([^\]]+)\]' - match = re.search(pattern, rp.PollingExpression) - if match: - col = match[0][1:-1] - fullCol = format_dax_object_name(table_name, col) - print(f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}") - else: - print(f"{unchecked} Detect data changes") - - def update_incremental_refresh_policy(self, table_name: str, incremental_granularity: str, incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, only_refresh_complete_days: Optional[bool] = False, detect_data_changes_column: Optional[str] = None): - - """ - Updates the incremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - incremental_granularity : str - Granularity of the (most recent) incremental refresh range. - incremental_periods : int - Number of periods for the incremental refresh range. - rolling_window_granularity : str - Target granularity of the rolling window for the whole semantic model. - rolling_window_periods : int - Number of periods for the rolling window for the whole semantic model. - only_refresh_complete_days : bool, default=False - Lag or leading periods from Now() to the rolling window head. - detect_data_changes_column : str, default=None - The column to use for detecting data changes. - Defaults to None which resolves to not detecting data changes. - - Returns - ------- - - """ - - if not self.has_incremental_refresh_policy(table_name = table_name): - print(f"The '{table_name}' table does not have an incremental refresh policy.") - return - - incGran = ['Day', 'Month', 'Quarter', 'Year'] - - incremental_granularity = incremental_granularity.capitalize() - rolling_window_granularity = rolling_window_granularity.capitalize() - - if incremental_granularity not in incGran: - print(f"{red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}.") - return - if rolling_window_granularity not in incGran: - print(f"{red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}.") - return - - if rolling_window_periods < 1: - print(f"{red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0.") - return - if incremental_periods < 1: - print(f"{red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0.") - return - - t = self.model.Tables[table_name] - - if detect_data_changes_column is not None: - dc = t.Columns[detect_data_changes_column] - - if dc.DataType != TOM.DataType.DateTime: - print(f"{red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type.") - return - - rp = TOM.BasicRefreshPolicy() - rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse(TOM.RefreshGranularityType, incremental_granularity) - rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse(TOM.RefreshGranularityType, rolling_window_granularity) - rp.SourceExpression = t.RefreshPolicy.SourceExpression - - if only_refresh_complete_days: - rp.IncrementalPeriodsOffset = -1 - else: - rp.IncrementalPeriodOffset = 0 - - if detect_data_changes_column is not None: - fullDC = format_dax_object_name(table_name, detect_data_changes_column) - ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" - rp.PollingExpression = ddcExpr - else: - rp.PollingExpression = None - - t.RefreshPolicy = rp - - self.show_incremental_refresh_policy(table_name=table_name) - - def add_incremental_refresh_policy(self, table_name: str, column_name: str, start_date: str, end_date: str, incremental_granularity: str, incremental_periods: int, rolling_window_granularity: str, rolling_window_periods: int, only_refresh_complete_days: Optional[bool] = False, detect_data_changes_column: Optional[str] = None): - - """ - Adds anincremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - The DateTime column to be used for the RangeStart and RangeEnd parameters. - start_date : str - The date to be used for the RangeStart parameter. - end_date : str - The date to be used for the RangeEnd parameter. - incremental_granularity : str - Granularity of the (most recent) incremental refresh range. - incremental_periods : int - Number of periods for the incremental refresh range. - rolling_window_granularity : str - Target granularity of the rolling window for the whole semantic model. - rolling_window_periods : int - Number of periods for the rolling window for the whole semantic model. - only_refresh_complete_days : bool, default=False - Lag or leading periods from Now() to the rolling window head. - detect_data_changes_column : str, default=None - The column to use for detecting data changes. - Defaults to None which resolves to not detecting data changes. - - Returns - ------- - - """ - - #https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure - - incGran = ['Day', 'Month', 'Quarter', 'Year'] - - incremental_granularity = incremental_granularity.capitalize() - rolling_window_granularity = rolling_window_granularity.capitalize() - - if incremental_granularity not in incGran: - print(f"{red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}.") - return - if rolling_window_granularity not in incGran: - print(f"{red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}.") - return - - if rolling_window_periods < 1: - print(f"{red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0.") - return - if incremental_periods < 1: - print(f"{red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0.") - return - - date_format = '%m/%d/%Y' - - date_obj_start = datetime.strptime(start_date, date_format) - start_year = date_obj_start.year - start_month = date_obj_start.month - start_day = date_obj_start.day - - date_obj_end = datetime.strptime(end_date, date_format) - end_year = date_obj_end.year - end_month = date_obj_end.month - end_day = date_obj_end.day - - if date_obj_end <= date_obj_start: - print(f"{red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'.") - return - - t = self.model.Tables[table_name] - - c = t.Columns[column_name] - fcName = format_dax_object_name(table_name, column_name) - dType = c.DataType - - if dType != TOM.DataType.DateTime: - print(f"{red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type.") - return - - if detect_data_changes_column is not None: - dc = t.Columns[detect_data_changes_column] - dcType = dc.DataType - - if dcType != TOM.DataType.DateTime: - print(f"{red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type.") - return - - # Start changes: - - # Update partition expression - i=0 - for p in t.Partitions: - if p.SourceType != TOM.PartitionSourceType.M: - print(f"{red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition.") - return - elif i==0: - text = p.Expression - text = text.rstrip() - - ind = text.rfind(' ') + 1 - obj = text[ind:] - pattern = r"in\s*[^ ]*" - matches = list(re.finditer(pattern, text)) - - if matches: - last_match = matches[-1] - text_before_last_match = text[:last_match.start()] - - print(text_before_last_match) - else: - print(f"{red_dot} Invalid M-partition expression.") - return - - endExpr = f'#"Filtered Rows IR" = Table.SelectRows({obj}, each [{column_name}] >= RangeStart and [{column_name}] <= RangeEnd)\n#"Filtered Rows IR"' - finalExpr = text_before_last_match + endExpr - - p.Expression = finalExpr - i+=1 - - # Add expressions - self.add_expression(name = 'RangeStart', expression = f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]') - self.add_expression(name = 'RangeEnd', expression = f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]') - - # Update properties - rp = TOM.BasicRefreshPolicy() - rp.IncrementalPeriods = incremental_periods - rp.IncrementalGranularity = System.Enum.Parse(TOM.RefreshGranularityType, incremental_granularity) - rp.RollingWindowPeriods = rolling_window_periods - rp.RollingWindowGranularity = System.Enum.Parse(TOM.RefreshGranularityType, rolling_window_granularity) - - if only_refresh_complete_days: - rp.IncrementalPeriodsOffset = -1 - else: - rp.IncrementalPeriodOffset = 0 - - if detect_data_changes_column is not None: - fullDC = format_dax_object_name(table_name, detect_data_changes_column) - ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" - rp.PollingExpression = ddcExpr - - t.RefreshPolicy = rp - - self.show_incremental_refresh_policy(table_name=table_name) - - def apply_refresh_policy(self, table_name: str, effective_date: Optional[datetime] = None, refresh: Optional[bool] = True, max_parallelism: Optional[int] = 0): - - """ - Applies the incremental refresh policy for a table within a semantic model. - - Parameters - ---------- - table_name : str - Name of the table. - effective_date : DateTime, default=None - The effective date that is used when calculating the partitioning scheme. - refresh : bool, default=True - An indication if partitions of the table should be refreshed or not; the default behavior is to do the refresh. - max_parallelism : int, default=0 - The degree of parallelism during the refresh execution. - - Returns - ------- - - """ - - self.model.Tables[table_name].ApplyRefreshPolicy(effectiveDate = effective_date, refresh = refresh, maxParallelism = max_parallelism) - - def set_data_coverage_definition(self, table_name: str, partition_name: str, expression: str): - - """ - Sets the data coverage definition for a partition. - - Parameters - ---------- - table_name : str - Name of the table. - partition_name : str - Name of the partition. - expression : str - DAX expression containing the logic for the data coverage definition. - - Returns - ------- - - """ - - doc = 'https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions' - - t = self.model.Tables[table_name] - p = t.Partitions[partition_name] - - ht = self.is_hybrid_table(table_name = table_name) - - if not ht: - print(f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}.") - return - if p.Mode != TOM.ModeType.DirectQuery: - print(f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}.") - return - - dcd = TOM.DataCoverageDefinition() - dcd.Expression = expression - p.DataCoverageDefinition = dcd - - def set_encoding_hint(self, table_name: str, column_name: str, value: str): - - """ - Sets the encoding hint for a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : str - Encoding hint value. Options: 'Value', 'Hash', 'Default'. - - Returns - ------- - - """ - - values = ['Default', 'Hash', 'Value'] - value = value.capitalize() - - if value not in values: - print(f"{red_dot} Invalid encoding hint value. Please choose from these options: {values}.") - return - - self.model.Tables[table_name].Columns[column_name].EncodingHint = System.Enum.Parse(TOM.EncodingHintType, value) - - def set_data_type(self, table_name: str, column_name: str, value: str): - - """ - Sets the data type for a column. - - Parameters - ---------- - table_name : str - Name of the table. - column_name : str - Name of the column. - value : str - The data type. - - Returns - ------- - - """ - - values = ['Binary', 'Boolean', 'DateTime', 'Decimal', 'Double', 'Int64', 'String'] - - value = value.replace(' ','').capitalize() - if value == 'Datetime': - value = 'DateTime' - elif value.startswith('Int'): - value = 'Int64' - elif value.startswith('Bool'): - value = 'Boolean' - - if value not in values: - print(f"{red_dot} Invalid data type. Please choose from these options: {values}.") - return - - self.model.Tables[table_name].Columns[column_name].DataType = System.Enum.Parse(TOM.DataType, value) - - def add_time_intelligence(self, measure_name: str, date_table: str, time_intel: Union[str, List[str]]): - - """ - Adds time intelligence measures - - Parameters - ---------- - measure_name : str - Name of the measure - date_table : str - Name of the date table. - time_intel : str, List[str] - Time intelligence measures to create (i.e. MTD, YTD, QTD). - - Returns - ------- - - """ - - table_name = None - time_intel_options = ['MTD', 'QTD', 'YTD'] - - if isinstance(time_intel, str): - time_intel = [time_intel] - - # Validate time intelligence variations - for t in time_intel: - t = t.capitalize() - if t not in [time_intel_options]: - print(f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}.") - return - - # Validate measure and extract table name - for m in self.all_measures(): - if m.Name == measure_name: - table_name = m.Parent.Name - - if table_name is None: - print(f"The '{measure_name}' is not a valid measure in the '{dataset}' semantic model within the '{workspace}' workspace.") - return - - # Validate date table - if not self.is_date_table(date_table): - print(f"{red_dot} The '{date_table}' table is not a valid date table in the '{dataset}' wemantic model within the '{workspace}' workspace.") - return - - # Extract date key from date table - for c in self.all_columns(): - if c.Parent.Name == date_table and c.IsKey: - date_key = c.Name - - # Create the new time intelligence measures - for t in time_intel: - if t == 'MTD': - expr = f"CALCULATE([{measure_name}],DATES{time_intel}('{date_table}'[{date_key}]))" - new_meas_name = f"{measure_name} {t}" - self.add_measure(table_name = table_name, measure_name = new_meas_name, expression = expr) - - def close(self): - if not readonly and self.model is not None: - self.model.SaveChanges() - - if len(fpAdded) > 0: - refresh_semantic_model(dataset = dataset, tables = fpAdded, workspace = workspace) - self.model = None - - tw = TOMWrapper(dataset = dataset, workspace = workspace, readonly = readonly) - try: - yield tw - finally: - tw.close() diff --git a/sempy_labs/Translations.py b/sempy_labs/Translations.py deleted file mode 100644 index 0f389ce3..00000000 --- a/sempy_labs/Translations.py +++ /dev/null @@ -1,228 +0,0 @@ -import pandas as pd -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def language_validate(language: str): - - """ - Validateds that the language specified exists within the supported langauges. - - Parameters - ---------- - language : str - The language code. - - Returns - ------- - bool - A True/False indication as to whether the language code is supported. - """ - - url = 'https://learn.microsoft.com/azure/ai-services/translator/language-support' - - tables = pd.read_html(url) - df = tables[0] - - df_filt = df[df['Language code'] == language] - - df_filt2 = df[df['Language'] == language.capitalize()] - - if len(df_filt) == 1: - lang = df_filt['Language'].iloc[0] - elif len(df_filt2) == 1: - lang = df_filt2['Language'].iloc[0] - else: - print(f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}.") - return - - return lang - -@log -def translate_semantic_model(dataset: str, languages: Union[str, List[str]], exclude_characters: Optional[str] = None, workspace: Optional[str] = None): - - """ - Translates names, descriptions, display folders for all objects in a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - languages : str, List[str] - The language code(s) in which to translate the semantic model. - exclude_characters : str - A string specifying characters which will be replaced by a space in the translation text when sent to the translation service. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - from synapse.ml.services import Translate - from pyspark.sql.functions import col, flatten - from pyspark.sql import SparkSession - from .TOM import connect_semantic_model - - if isinstance(languages, str): - languages = [languages] - - dfPrep = pd.DataFrame(columns=['Object Type', 'Name', 'Description', 'Display Folder']) - - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: - - if exclude_characters is None: - for o in tom.model.Tables: - new_data = {'Object Type': 'Table', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': None, 'TDisplay Folder': None} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_columns(): - new_data = {'Object Type': 'Column', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_measures(): - new_data = {'Object Type': 'Measure', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_hierarchies(): - new_data = {'Object Type': 'Hierarchy', 'Name': o.Name, 'TName': o.Name, 'Description': o.Description, 'TDescription': o.Description, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': o.DisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - else: - for o in tom.model.Tables: - oName = o.Name - oDescription = o.Description - for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - new_data = {'Object Type': 'Table', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': None, 'TDisplay Folder': None} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_columns(): - oName = o.Name - oDescription = o.Description - oDisplayFolder = o.DisplayFolder - for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Column', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_measures(): - oName = o.Name - oDescription = o.Description - oDisplayFolder = o.DisplayFolder - for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Measure', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for o in tom.all_hierarchies(): - oName = o.Name - oDescription = o.Description - oDisplayFolder = o.DisplayFolder - for s in exclude_characters: - oName = oName.replace(s, ' ') - oDescription = oDescription.replace(s, ' ') - oDisplayFolder = oDisplayFolder.replace(s, ' ') - new_data = {'Object Type': 'Hierarchy', 'Name': o.Name, 'TName': oName, 'Description': o.Description, 'TDescription': oDescription, 'Display Folder': o.DisplayFolder, 'TDisplay Folder': oDisplayFolder} - dfPrep = pd.concat([dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - spark = SparkSession.builder.getOrCreate() - df = spark.createDataFrame(dfPrep) - - columns = ['Name', 'Description', 'Display Folder'] - - for clm in columns: - columnToTranslate = f"T{clm}" - translate = ( - Translate() - .setTextCol(columnToTranslate) - .setToLanguage(languages) - .setOutputCol("translation") - .setConcurrency(5) - ) - - transDF = (translate - .transform(df) - .withColumn("translation", flatten(col("translation.translations"))) - .withColumn("translation", col("translation.text")) - .select('Object Type', clm, columnToTranslate, 'translation')) - - df_panda = transDF.toPandas() - print(f"{in_progress} Translating {clm}s...") - - for lang in languages: - i = languages.index(lang) - tom.add_translation(language = lang) - print(f"{in_progress} Translating into the '{lang}' language...") - - for t in tom.model.Tables: - if t.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Table') & (df_panda['Name'] == t.Name)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = t, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set for the '{lang}' language on the '{t.Name}' table.") - elif clm == 'Description' and t.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Table') & (df_panda['Description'] == t.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = t, language = lang, property = 'Description', value = tr) - for c in t.Columns: - if c.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Name'] == c.Name)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set on the '{c.Name}' column within the {t.Name}' table.") - elif clm == 'Description' and c.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Description'] == c.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and c.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Column') & (df_panda['Display Folder'] == c.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = c, language = lang, property = 'Display Folder', value = tr) - for h in t.Hierarchies: - if h.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Name'] == h.Name)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Name', value = tr) - elif clm == 'Description' and h.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Description'] == h.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and h.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Hierarchy') & (df_panda['Display Folder'] == h.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = h, language = lang, property = 'Display Folder', value = tr) - for ms in t.Measures: - if ms.IsHidden == False: - if clm == 'Name': - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Name'] == ms.Name)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Name', value = tr) - print(f"{green_dot} Translation '{tr}' set on the '{ms.Name}' column within the {t.Name}' table.") - elif clm == 'Description' and ms.Description is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Description'] == ms.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Description', value = tr) - elif clm == 'Display Folder' and ms.DisplayFolder is not None: - df_filt = df_panda[(df_panda['Object Type'] == 'Measure') & (df_panda['Display Folder'] == ms.Description)] - if len(df_filt) == 1: - tr = df_filt['translation'].str[i].iloc[0] - tom.set_translation(object = ms, language = lang, property = 'Display Folder', value = tr) diff --git a/sempy_labs/UpdateDirectLakeModelLakehouseConnection.py b/sempy_labs/UpdateDirectLakeModelLakehouseConnection.py deleted file mode 100644 index 4f45c309..00000000 --- a/sempy_labs/UpdateDirectLakeModelLakehouseConnection.py +++ /dev/null @@ -1,71 +0,0 @@ -import sempy -import sempy.fabric as fabric -from .GetSharedExpression import get_shared_expression -from .HelperFunctions import resolve_lakehouse_name -from .TOM import connect_semantic_model -from typing import List, Optional, Union - -def update_direct_lake_model_lakehouse_connection(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): - - """ - Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name in which the semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - lakehouse : str, default=None - The Fabric lakehouse used by the Direct Lake semantic model. - Defaults to None which resolves to the lakehouse attached to the notebook. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse_workspace == None: - lakehouse_workspace = workspace - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Check if lakehouse is valid - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'Lakehouse') - dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] - - if len(dfI_filt) == 0: - print(f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace.") - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - - if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models.") - else: - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: - - shEx = get_shared_expression(lakehouse,lakehouse_workspace) - try: - tom.model.Expressions['DatabaseQuery'].Expression = shEx - print(f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace.") - except: - print(f"ERROR: The expression in the '{dataset}' semantic model was not updated.") - - - - diff --git a/sempy_labs/Vertipaq.py b/sempy_labs/Vertipaq.py deleted file mode 100644 index f88e08b3..00000000 --- a/sempy_labs/Vertipaq.py +++ /dev/null @@ -1,571 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from IPython.display import display, HTML -import zipfile, os, shutil, datetime, warnings -from pyspark.sql import SparkSession -from .HelperFunctions import format_dax_object_name, get_direct_lake_sql_endpoint, resolve_lakehouse_name -from .ListFunctions import list_relationships -from .GetLakehouseTables import get_lakehouse_tables -from .Lakehouse import lakehouse_attached -from typing import List, Optional, Union -from sempy._utils._log import log - -@log -def vertipaq_analyzer(dataset: str, workspace: Optional[str] = None, export: Optional[str] = None, lakehouse_workspace: Optional[str] = None, read_stats_from_data: Optional[bool] = False): - - """ - Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name in which the semantic model exists. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - export : str, default=None - Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function. - Specifying 'table' will export the results to delta tables (appended) in your lakehouse. - Default value: None. - lakehouse_workspace : str, default=None - The Fabric workspace used by the lakehouse (for Direct Lake semantic models). - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - read_stats_from_data : bool, default=False - Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse). - - Returns - ------- - - """ - - pd.options.mode.copy_on_write = True - warnings.filterwarnings("ignore", message="createDataFrame attempted Arrow optimization*") - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - - if lakehouse_workspace == None: - lakehouse_workspace = workspace - - dfT = fabric.list_tables(dataset = dataset, extended=True, workspace = workspace) - dfT.rename(columns={'Name': 'Table Name'}, inplace=True) - dfC = fabric.list_columns(dataset = dataset, extended=True, workspace = workspace) - dfC['Column Object'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC.rename(columns={'Column Cardinality': 'Cardinality'}, inplace=True) - dfH = fabric.list_hierarchies(dataset = dataset, extended=True, workspace = workspace) - dfR = list_relationships(dataset = dataset, extended=True, workspace = workspace) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - dfP = fabric.list_partitions(dataset = dataset, extended=True, workspace = workspace) - dfD = fabric.list_datasets(workspace = workspace, additional_xmla_properties=['CompatibilityLevel','Model.DefaultMode']) - dfD = dfD[dfD['Dataset Name'] == dataset] - dfD['Compatibility Level'] = dfD['Compatibility Level'].astype(int) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) - dfR['Missing Rows'] = None - - # Direct Lake - if read_stats_from_data: - if isDirectLake: - dfC = pd.merge(dfC, dfP[['Table Name', 'Query', 'Source Type']], on='Table Name', how='left') - dfC_flt = dfC[(dfC['Source Type'] == 'Entity') & (~dfC['Column Name'].str.startswith('RowNumber-'))] - sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - - # Get lakehouse name from SQL Endpoint ID - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] - - if len(dfI_filt) == 0: - print(f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter.") - else: - lakehouseName = dfI_filt['Display Name'].iloc[0] - - current_workspace_id = fabric.get_workspace_id() - current_workspace = fabric.resolve_workspace_name(current_workspace_id) - if current_workspace != lakehouse_workspace: - lakeTables = get_lakehouse_tables(lakehouse = lakehouseName, workspace = lakehouse_workspace) - - sql_statements = [] - spark = SparkSession.builder.getOrCreate() - # Loop through tables - for lakeTName in dfC_flt['Query'].unique(): - query = 'SELECT ' - columns_in_table = dfC_flt.loc[dfC_flt['Query'] == lakeTName, 'Source'].unique() - - # Loop through columns within those tables - for scName in columns_in_table: - query = query + f"COUNT(DISTINCT({scName})) AS {scName}, " - - query = query[:-2] - if lakehouse_workspace == current_workspace: - query = query + f" FROM {lakehouseName}.{lakeTName}" - else: - lakeTables_filt = lakeTables[lakeTables['Table Name'] == lakeTName] - tPath = lakeTables_filt['Location'].iloc[0] - - df = spark.read.format("delta").load(tPath) - tempTableName = 'delta_table_' + lakeTName - df.createOrReplaceTempView(tempTableName) - query = query + f" FROM {tempTableName}" - sql_statements.append((lakeTName, query)) - - for o in sql_statements: - tName = o[0] - query = o[1] - - df = spark.sql(query) - - for column in df.columns: - x = df.collect()[0][column] - for i, r in dfC.iterrows(): - if r['Query'] == tName and r['Source'] == column: - dfC.at[i, 'Cardinality'] = x - - # Remove column added temporarily - dfC.drop(columns=['Query', 'Source Type'], inplace=True) - - # Direct Lake missing rows - dfR = pd.merge(dfR, dfP[['Table Name', 'Query']], left_on = 'From Table', right_on = 'Table Name', how = 'left') - dfR.rename(columns={'Query': 'From Lake Table'}, inplace=True) - dfR.drop(columns=['Table Name'], inplace=True) - dfR = pd.merge(dfR, dfP[['Table Name', 'Query']], left_on = 'To Table', right_on = 'Table Name', how = 'left') - dfR.rename(columns={'Query': 'To Lake Table'}, inplace=True) - dfR.drop(columns=['Table Name'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Source']], left_on = 'From Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Source': 'From Lake Column'}, inplace=True) - dfR.drop(columns=['Column Object'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Source']], left_on = 'To Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Source': 'To Lake Column'}, inplace=True) - dfR.drop(columns=['Column Object'], inplace=True) - - spark = SparkSession.builder.getOrCreate() - for i, r in dfR.iterrows(): - fromTable = r['From Lake Table'] - fromColumn = r['From Lake Column'] - toTable= r['To Lake Table'] - toColumn = r['To Lake Column'] - - if lakehouse_workspace == current_workspace: - query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" - else: - tempTableFrom = 'delta_table_' + fromTable - tempTableTo = 'delta_table_' + toTable - - query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" - - #query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" - - df = spark.sql(query) - missingRows = df.collect()[0][0] - dfR.at[i, 'Missing Rows'] = missingRows - - dfR['Missing Rows'] = dfR['Missing Rows'].astype(int) - else: - # Calculate missing rows using DAX for non-direct lake - for i, r in dfR.iterrows(): - fromTable = r['From Table'] - fromColumn = r['From Column'] - toTable= r['To Table'] - toColumn = r['To Column'] - isActive = bool(r['Active']) - fromObject = format_dax_object_name(fromTable, fromColumn) - toObject= format_dax_object_name(toTable, toColumn) - - missingRows = 0 - - query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)" - - if isActive == False: # add userelationship - query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)" - - result = fabric.evaluate_dax(dataset = dataset, dax_string = query, workspace = workspace) - - try: - missingRows = result.iloc[0,0] - except: - pass - - dfR.at[i, 'Missing Rows'] = missingRows - dfR['Missing Rows'] = dfR['Missing Rows'].astype(int) - - dfTP = dfP.groupby('Table Name')['Partition Name'].count().reset_index() - dfTP.rename(columns={'Partition Name': 'Partitions'}, inplace=True) - dfTC = dfC.groupby('Table Name')['Column Name'].count().reset_index() - dfTC.rename(columns={'Column Name': 'Columns'}, inplace=True) - - total_size = dfC['Total Size'].sum() - table_sizes = dfC.groupby('Table Name')['Total Size'].sum().reset_index() - table_sizes.rename(columns={'Total Size': 'Table Size'}, inplace=True) - - # Columns - dfC_filt = dfC[~dfC['Column Name'].str.startswith('RowNumber-')] - dfC_filt['% DB'] = round((dfC_filt['Total Size'] / total_size) * 100,2) - dfC_filt = pd.merge(dfC_filt, table_sizes, on = 'Table Name', how = 'left') - dfC_filt['% Table'] = round((dfC_filt['Total Size'] / dfC_filt['Table Size']) * 100,2) - columnList = ['Table Name', 'Column Name', 'Type', 'Cardinality', 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size','% Table', '% DB', 'Data Type', 'Encoding', 'Is Resident', 'Temperature', 'Last Accessed'] - - colSize = dfC_filt[columnList].sort_values(by='Total Size', ascending=False) - temp = dfC_filt[columnList].sort_values(by='Temperature', ascending=False) - colSize.reset_index(drop=True, inplace=True) - temp.reset_index(drop=True, inplace=True) - - export_Col = colSize.copy() - - intList = ['Cardinality', 'Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - pctList = ['% Table', '% DB'] - colSize[intList] = colSize[intList].applymap('{:,}'.format) - temp[intList] = temp[intList].applymap('{:,}'.format) - colSize[pctList] = colSize[pctList].applymap('{:.2f}%'.format) - temp[pctList] = temp[pctList].applymap('{:.2f}%'.format) - - # Tables - intList = ['Total Size', 'Data Size', 'Dictionary Size', 'Hierarchy Size'] - dfCSum = dfC.groupby(['Table Name'])[intList].sum().reset_index() - dfCSum['% DB'] = round((dfCSum['Total Size'] / total_size) * 100,2) - - dfTable = pd.merge(dfT[['Table Name', 'Type', 'Row Count']], dfCSum, on = 'Table Name', how = 'inner') - dfTable = pd.merge(dfTable,dfTP, on = 'Table Name', how = 'left') - dfTable = pd.merge(dfTable,dfTC, on = 'Table Name', how = 'left') - dfTable = dfTable.drop_duplicates() #Drop duplicates (temporary) - dfTable = dfTable.sort_values(by='Total Size', ascending=False) - dfTable.reset_index(drop=True, inplace=True) - export_Table = dfTable.copy() - - intList.extend(['Row Count', 'Partitions', 'Columns']) - dfTable[intList] = dfTable[intList].applymap('{:,}'.format) - pctList = ['% DB'] - dfTable[pctList] = dfTable[pctList].applymap('{:.2f}%'.format) - - ## Relationships - #dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Cardinality']], left_on = 'From Object', right_on = 'Column Object', how = 'left') - dfR.rename(columns={'Cardinality': 'Max From Cardinality'}, inplace=True) - dfR = pd.merge(dfR, dfC[['Column Object', 'Cardinality']], left_on = 'To Object', right_on = 'Column Object', how='left') - dfR.rename(columns={'Cardinality': 'Max To Cardinality'}, inplace=True) - dfR = dfR[['From Object', 'To Object', 'Multiplicity', 'Used Size', 'Max From Cardinality', 'Max To Cardinality', 'Missing Rows']].sort_values(by='Used Size', ascending=False) - dfR.reset_index(drop=True, inplace=True) - export_Rel = dfR.copy() - intList = ['Used Size', 'Max From Cardinality', 'Max To Cardinality', 'Missing Rows'] - if read_stats_from_data == False: - intList.remove('Missing Rows') - dfR[intList] = dfR[intList].applymap('{:,}'.format) - - ## Partitions - dfP = dfP[['Table Name', 'Partition Name', 'Mode', 'Record Count', 'Segment Count']].sort_values(by='Record Count', ascending=False) #, 'Records per Segment' - dfP['Records per Segment'] = round(dfP['Record Count'] / dfP['Segment Count'],2) # Remove after records per segment is fixed - dfP.reset_index(drop=True, inplace=True) - export_Part = dfP.copy() - intList = ['Record Count', 'Segment Count', 'Records per Segment'] - dfP[intList] = dfP[intList].applymap('{:,}'.format) - - ## Hierarchies - dfH_filt = dfH[dfH['Level Ordinal'] == 0] - dfH_filt = dfH_filt[['Table Name', 'Hierarchy Name', 'Used Size']].sort_values(by='Used Size', ascending=False) - dfH_filt.reset_index(drop=True, inplace=True) - export_Hier = dfH_filt.copy() - intList = ['Used Size'] - dfH_filt[intList] = dfH_filt[intList].applymap('{:,}'.format) - - ## Model - if total_size >= 1000000000: - y = total_size / (1024 ** 3) * 1000000000 - elif total_size >= 1000000: - y = total_size / (1024 ** 2) * 1000000 - elif total_size >= 1000: - y = total_size / (1024) * 1000 - y = round(y) - - tblCount = len(dfT) - colCount = len(dfC_filt) - compatLevel = dfD['Compatibility Level'].iloc[0] - defMode = dfD['Model Default Mode'].iloc[0] - - dfModel = pd.DataFrame({'Dataset Name': dataset, 'Total Size': y, 'Table Count': tblCount, 'Column Count': colCount, 'Compatibility Level': compatLevel, 'Default Mode': defMode}, index=[0]) - dfModel.reset_index(drop=True, inplace=True) - export_Model = dfModel.copy() - intList = ['Total Size', 'Table Count', 'Column Count'] - dfModel[intList] = dfModel[intList].applymap('{:,}'.format) - - dataFrames = { - 'dfModel': dfModel, - 'dfTable': dfTable, - 'dfP': dfP, - 'colSize': colSize, - 'temp': temp, - 'dfR': dfR, - 'dfH_filt': dfH_filt - } - - dfs = {} - for fileName, df in dataFrames.items(): - dfs[fileName] = df - - visualize_vertipaq(dfs) - - ### Export vertipaq to delta tables in lakehouse - if export in ['table','zip']: - lakeAttach = lakehouse_attached() - if lakeAttach == False: - print(f"In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook.") - return - - if export == 'table': - spark = SparkSession.builder.getOrCreate() - - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id = lakehouse_id, workspace = workspace) - lakeTName = 'vertipaq_analyzer_model' - - lakeT = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeT_filt = lakeT[lakeT['Table Name'] == lakeTName] - - query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" - - if len(lakeT_filt) == 0: - runId = 1 - else: - dfSpark = spark.sql(query) - maxRunId = dfSpark.collect()[0][0] - runId = maxRunId + 1 - - dfMap = { - 'export_Col': ['Columns', export_Col], - 'export_Table': ['Tables', export_Table], - 'export_Part': ['Partitions', export_Part], - 'export_Rel': ['Relationships', export_Rel], - 'export_Hier': ['Hierarchies', export_Hier], - 'export_Model': ['Model', export_Model] - } - - print(f"Saving Vertipaq Analyzer to delta tables in the lakehouse...\n") - now = datetime.datetime.now() - for key, (obj, df) in dfMap.items(): - df['Timestamp'] = now - df['Workspace Name'] = workspace - df['Dataset Name'] = dataset - df['RunId'] = runId - - colName = 'Workspace Name' - df.insert(0, colName, df.pop(colName)) - colName = 'Dataset Name' - df.insert(1, colName, df.pop(colName)) - - df.columns = df.columns.str.replace(' ', '_') - - delta_table_name = f"VertipaqAnalyzer_{obj}".lower() - spark_df = spark.createDataFrame(df) - spark_df.write.mode('append').format('delta').saveAsTable(delta_table_name) - print(f"\u2022 Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table.") - - ### Export vertipaq to zip file within the lakehouse - if export == 'zip': - dataFrames = { - 'dfModel': dfModel, - 'dfTable': dfTable, - 'dfP': dfP, - 'colSize': colSize, - 'temp': temp, - 'dfR': dfR, - 'dfH_filt': dfH_filt - } - - zipFileName = f"{workspace}.{dataset}.zip" - - folderPath = '/lakehouse/default/Files' - subFolderPath = os.path.join(folderPath, 'VertipaqAnalyzer') - ext = '.csv' - if not os.path.exists(subFolderPath): - os.makedirs(subFolderPath, exist_ok=True) - zipFilePath = os.path.join(subFolderPath, zipFileName) - - # Create CSV files based on dataframes - for fileName, df in dataFrames.items(): - filePath = os.path.join(subFolderPath, fileName + ext) - df.to_csv(filePath, index=False) - - # Create a zip file and add CSV files to it - with zipfile.ZipFile(zipFilePath, 'w') as zipf: - for fileName in dataFrames: - filePath = os.path.join(subFolderPath, fileName + ext) - zipf.write(filePath, os.path.basename(filePath)) - - # Clean up: remove the individual CSV files - for fileName, df in dataFrames.items(): - filePath = os.path.join(subFolderPath, fileName) + ext - if os.path.exists(filePath): - os.remove(filePath) - print(f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer\{zipFileName}' in the default lakehouse attached to this notebook.") - -def visualize_vertipaq(dataframes): - - # Tooltips for columns within the visual - data = [ - {'ViewName': 'Model', 'ColumnName': 'Dataset Name', 'Tooltip': 'The name of the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Total Size', 'Tooltip': 'The size of the model (in bytes)'}, - {'ViewName': 'Model', 'ColumnName': 'Table Count', 'Tooltip': 'The number of tables in the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Column Count', 'Tooltip': 'The number of columns in the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Compatibility Level', 'Tooltip': 'The compatibility level of the semantic model'}, - {'ViewName': 'Model', 'ColumnName': 'Default Mode', 'Tooltip': 'The default query mode of the semantic model'}, - {'ViewName': 'Table', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Table', 'ColumnName': 'Type', 'Tooltip': 'The type of table'}, - {'ViewName': 'Table', 'ColumnName': 'Row Count', 'Tooltip': 'The number of rows in the table'}, - {'ViewName': 'Table', 'ColumnName': 'Total Size', 'Tooltip': 'Data Size + Dictionary Size + Hierarchy Size (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': 'Data Size', 'Tooltip': 'The size of the data for all the columns in this table (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': 'Dictionary Size', 'Tooltip': "The size of the column's dictionary for all columns in this table (in bytes)"}, - {'ViewName': 'Table', 'ColumnName': 'Hierarchy Size', 'Tooltip': 'The size of hierarchy structures for all columns in this table (in bytes)'}, - {'ViewName': 'Table', 'ColumnName': '% DB', 'Tooltip': 'The size of the table relative to the size of the semantic model'}, - {'ViewName': 'Table', 'ColumnName': 'Partitions', 'Tooltip': 'The number of partitions in the table'}, - {'ViewName': 'Table', 'ColumnName': 'Columns', 'Tooltip': 'The number of columns in the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Partition Name', 'Tooltip': 'The name of the partition within the table'}, - {'ViewName': 'Partition', 'ColumnName': 'Mode', 'Tooltip': 'The query mode of the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Record Count', 'Tooltip': 'The number of rows in the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Segment Count', 'Tooltip': 'The number of segments within the partition'}, - {'ViewName': 'Partition', 'ColumnName': 'Records per Segment', 'Tooltip': 'The number of rows per segment'}, - {'ViewName': 'Column', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Column', 'ColumnName': 'Column Name', 'Tooltip': 'The name of the column'}, - {'ViewName': 'Column', 'ColumnName': 'Type', 'Tooltip': 'The type of column'}, - {'ViewName': 'Column', 'ColumnName': 'Cardinality', 'Tooltip': 'The number of unique rows in the column'}, - {'ViewName': 'Column', 'ColumnName': 'Total Size', 'Tooltip': 'Data Size + Dictionary Size + Hierarchy Size (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': 'Data Size', 'Tooltip': 'The size of the data for the column (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': 'Dictionary Size', 'Tooltip': "The size of the column's dictionary (in bytes)"}, - {'ViewName': 'Column', 'ColumnName': 'Hierarchy Size', 'Tooltip': 'The size of hierarchy structures (in bytes)'}, - {'ViewName': 'Column', 'ColumnName': '% Table', 'Tooltip': 'The size of the column relative to the size of the table'}, - {'ViewName': 'Column', 'ColumnName': '% DB', 'Tooltip': 'The size of the column relative to the size of the semantic model'}, - {'ViewName': 'Column', 'ColumnName': 'Data Type', 'Tooltip': 'The data type of the column'}, - {'ViewName': 'Column', 'ColumnName': 'Encoding', 'Tooltip': 'The encoding type for the column'}, - {'ViewName': 'Column', 'ColumnName': 'Is Resident', 'Tooltip': 'Indicates whether the column is in memory or not'}, - {'ViewName': 'Column', 'ColumnName': 'Temperature', 'Tooltip': 'A decimal indicating the frequency and recency of queries against the column'}, - {'ViewName': 'Column', 'ColumnName': 'Last Accessed', 'Tooltip': 'The time the column was last queried'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Table Name', 'Tooltip': 'The name of the table'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Hierarchy Name', 'Tooltip': 'The name of the hierarchy'}, - {'ViewName': 'Hierarchy', 'ColumnName': 'Used Size', 'Tooltip': 'The size of user hierarchy structures (in bytes)'}, - {'ViewName': 'Relationship', 'ColumnName': 'From Object', 'Tooltip': 'The from table/column in the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'To Object', 'Tooltip': 'The to table/column in the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Multiplicity', 'Tooltip': 'The cardinality on each side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Used Size', 'Tooltip': 'The size of the relationship (in bytes)'}, - {'ViewName': 'Relationship', 'ColumnName': 'Max From Cardinality', 'Tooltip': 'The number of unique values in the column used in the from side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Max To Cardinality', 'Tooltip': 'The number of unique values in the column used in the to side of the relationship'}, - {'ViewName': 'Relationship', 'ColumnName': 'Missing Rows', 'Tooltip': "The number of rows in the 'from' table which do not map to the key column in the 'to' table"} - ] - - # Create DataFrame - tooltipDF = pd.DataFrame(data) - - #define the dictionary with {"Tab name":df} - df_dict = { - "Model Summary":dataframes['dfModel'], - "Tables":dataframes['dfTable'], - "Partitions": dataframes['dfP'], - "Columns (Total Size)": dataframes['colSize'], - "Columns (Temperature)": dataframes['temp'], - "Relationships": dataframes['dfR'], - "Hierarchies": dataframes['dfH_filt'] - } - - mapping = { - 'Model Summary': 'Model', - 'Tables': 'Table', - 'Partitions': 'Partition', - 'Columns (Total Size)': 'Column', - 'Columns (Temperature)': 'Column', - 'Relationships': 'Relationship', - 'Hierarchies': 'Hierarchy' -} - - # Basic styles for the tabs and tab content - styles = """ - - """ - # JavaScript for tab functionality - script = """ - - """ - - - # HTML for tabs - tab_html = '
' - content_html = '' - for i, (title, df) in enumerate(df_dict.items()): - tab_id = f"tab{i}" - tab_html += f'' - - vw = mapping.get(title) - - df_html = df.to_html() - for col in df.columns: - tt = None - try: - tooltipDF_filt = tooltipDF[(tooltipDF['ViewName'] == vw) & (tooltipDF['ColumnName'] == col)] - tt = tooltipDF_filt['Tooltip'].iloc[0] - except: - pass - df_html = df_html.replace(f'{col}', f'{col}') - content_html += f'

{title}

{df_html}
' - tab_html += '
' - - # Display the tabs, tab contents, and run the script - display(HTML(styles + tab_html + content_html + script)) - # Default to open the first tab - display(HTML("")) - -@log -def import_vertipaq_analyzer(folder_path: str, file_name: str): - - """ - Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse. - - Parameters - ---------- - folder_path : str - The folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved. - file_name : str - The file name of the file which contains the vertipaq analyzer info. - - Returns - ------- - str - A visualization of the Vertipaq Analyzer statistics. - """ - - pd.options.mode.copy_on_write = True - - zipFilePath = os.path.join(folder_path, file_name) - extracted_dir = os.path.join(folder_path, 'extracted_dataframes') - - with zipfile.ZipFile(zipFilePath, 'r') as zip_ref: - zip_ref.extractall(extracted_dir) - - # Read all CSV files into a dictionary of DataFrames - dfs = {} - for file_name in zip_ref.namelist(): - df = pd.read_csv(extracted_dir + '/' + file_name) - dfs[file_name] = df - - visualize_vertipaq(dfs) - - # Clean up: remove the extracted directory - shutil.rmtree(extracted_dir) \ No newline at end of file diff --git a/sempy_labs/WarmCache.py b/sempy_labs/WarmCache.py deleted file mode 100644 index b4d340d0..00000000 --- a/sempy_labs/WarmCache.py +++ /dev/null @@ -1,175 +0,0 @@ -import sempy -import sempy.fabric as fabric -import pandas as pd -from tqdm.auto import tqdm -import numpy as np -import time -from .HelperFunctions import format_dax_object_name -from .RefreshSemanticModel import refresh_semantic_model -from .GetMeasureDependencies import get_measure_dependencies -from typing import List, Optional, Union -from sempy._utils._log import log - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -@log -def warm_direct_lake_cache_perspective(dataset: str, perspective: str, add_dependencies: Optional[bool] = False, workspace: Optional[str] = None): - - """ - Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective. - - Parameters - ---------- - dataset : str - Name of the semantic model. - perspective : str - Name of the perspective which contains objects to be used for warming the cache. - add_dependencies : bool, default=False - Includes object dependencies in the cache warming process. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"{red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode.") - return - - dfPersp = fabric.list_perspectives(dataset = dataset, workspace = workspace) - dfPersp['DAX Object Name'] = format_dax_object_name(dfPersp['Table Name'], dfPersp['Object Name']) - dfPersp_filt = dfPersp[dfPersp['Perspective Name'] == perspective] - - if len(dfPersp_filt) == 0: - print(f"{red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace.") - return - dfPersp_c = dfPersp_filt[dfPersp_filt['Object Type'] == 'Column'] - - column_values = dfPersp_c['DAX Object Name'].tolist() - - if add_dependencies: - # Measure dependencies - md = get_measure_dependencies(dataset, workspace) - md['Referenced Full Object'] = format_dax_object_name(md['Referenced Table'], md['Referenced Object']) - dfPersp_m = dfPersp_filt[(dfPersp_filt['Object Type'] == 'Measure')] - md_filt = md[(md['Object Name'].isin(dfPersp_m['Object Name'].values)) & (md['Referenced Object Type'] == 'Column')] - measureDep = md_filt['Referenced Full Object'].unique() - - # Hierarchy dependencies - dfPersp_h = dfPersp_filt[(dfPersp_filt['Object Type'] == 'Hierarchy')] - dfH = fabric.list_hierarchies(dataset = dataset, workspace = workspace) - dfH['Hierarchy Object'] = format_dax_object_name(dfH['Table Name'], dfH['Hierarchy Name']) - dfH['Column Object'] = format_dax_object_name(dfH['Table Name'], dfH['Column Name']) - dfH_filt = dfH[dfH['Hierarchy Object'].isin(dfPersp_h['DAX Object Name'].values)] - hierarchyDep = dfH_filt['Column Object'].unique() - - # Relationship dependencies - unique_table_names = dfPersp_filt['Table Name'].unique() - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) - dfR['From Object'] = format_dax_object_name(dfR['From Table'], dfR['From Column']) - dfR['To Object'] = format_dax_object_name(dfR['To Table'], dfR['To Column']) - filtered_dfR = dfR[dfR['From Table'].isin(unique_table_names) & dfR['To Table'].isin(unique_table_names)] - - fromObjects = filtered_dfR['From Object'].unique() - toObjects = filtered_dfR['To Object'].unique() - - merged_list = np.concatenate([column_values, measureDep, hierarchyDep, fromObjects, toObjects]) - merged_list_unique = list(set(merged_list)) - - else: - merged_list_unique = column_values - - df = pd.DataFrame(merged_list_unique, columns=['DAX Object Name']) - df[['Table Name', 'Column Name']] = df['DAX Object Name'].str.split('[', expand=True) - df['Table Name'] = df['Table Name'].str[1:-1] - df['Column Name'] = df['Column Name'].str[0:-1] - - tbls = list(set(value.split('[')[0] for value in merged_list_unique)) - - for tableName in (bar := tqdm(tbls)): - filtered_list = [value for value in merged_list_unique if value.startswith(f"{tableName}[")] - bar.set_description(f"Warming the '{tableName}' table...") - css = ','.join(map(str, filtered_list)) - dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))""" - x = fabric.evaluate_dax(dataset = dataset, dax_string = dax, workspace = workspace) - - print(f"{green_dot} The following columns have been put into memory:") - - new_column_order = ['Table Name', 'Column Name', 'DAX Object Name'] - df = df.reindex(columns=new_column_order) - df = df[['Table Name', 'Column Name']].sort_values(by=['Table Name', 'Column Name'], ascending=True) - - return df - -@log -def warm_direct_lake_cache_isresident(dataset: str, workspace: Optional[str] = None): - - """ - Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. - - Parameters - ---------- - dataset : str - Name of the semantic model. - workspace : str, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - if not any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()): - print(f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode.") - return - - # Identify columns which are currently in memory (Is Resident = True) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace, extended = True) - dfC['DAX Object Name'] = format_dax_object_name(dfC['Table Name'], dfC['Column Name']) - dfC_filtered = dfC[dfC['Is Resident']] - - if len(dfC_filtered) == 0: - print(f"{yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace.") - return - - # Refresh/frame dataset - refresh_semantic_model(dataset = dataset, refresh_type = 'full', workspace = workspace) - - time.sleep(2) - - tbls = dfC_filtered['Table Name'].unique() - column_values = dfC_filtered['DAX Object Name'].tolist() - - # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity) - for tableName in (bar := tqdm(tbls)): - bar.set_description(f"Warming the '{tableName}' table...") - css = ','.join(map(str, column_values)) - dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))""" - x = fabric.evaluate_dax(dataset = dataset, dax_string = dax, workspace = workspace) - - print(f"{green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh.") - - return dfC_filtered[['Table Name', 'Column Name', 'Is Resident', 'Temperature']].sort_values(by=['Table Name', 'Column Name'], ascending=True) diff --git a/sempy_labs/__init__.py b/sempy_labs/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/sempy_labs/shortcuts.py b/sempy_labs/shortcuts.py deleted file mode 100644 index 8a246bff..00000000 --- a/sempy_labs/shortcuts.py +++ /dev/null @@ -1,245 +0,0 @@ -import sempy_labs -import sempy.fabric as fabric -import pandas as pd -from .HelperFunctions import resolve_lakehouse_name, resolve_lakehouse_id -from typing import List, Optional, Union - -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def create_shortcut_onelake(table_name: str, source_lakehouse: str, source_workspace: str, destination_lakehouse: str, destination_workspace: Optional[str] = None, shortcut_name: Optional[str] = None): - - """ - Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to a delta table in OneLake. - - Parameters - ---------- - table_name : str - The table name for which a shortcut will be created. - source_lakehouse : str - The Fabric lakehouse in which the table resides. - source_workspace : str - The name of the Fabric workspace in which the source lakehouse exists. - destination_lakehouse : str - The Fabric lakehouse in which the shortcut will be created. - destination_workspace : str, default=None - The name of the Fabric workspace in which the shortcut will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - shortcut_name : str, default=None - The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. - - Returns - ------- - - """ - - sourceWorkspaceId = fabric.resolve_workspace_id(source_workspace) - sourceLakehouseId = resolve_lakehouse_id(source_lakehouse, source_workspace) - - if destination_workspace == None: - destination_workspace = source_workspace - - destinationWorkspaceId = fabric.resolve_workspace_id(destination_workspace) - destinationLakehouseId = resolve_lakehouse_id(destination_lakehouse, destination_workspace) - - if shortcut_name == None: - shortcut_name = table_name - - client = fabric.FabricRestClient() - tablePath = 'Tables/' + table_name - - request_body = { - "path": 'Tables', - "name": shortcut_name.replace(' ',''), - "target": { - "oneLake": { - "workspaceId": sourceWorkspaceId, - "itemId": sourceLakehouseId, - "path": tablePath} - } - } - - try: - response = client.post(f"/v1/workspaces/{destinationWorkspaceId}/items/{destinationLakehouseId}/shortcuts",json=request_body) - if response.status_code == 201: - print(f"{green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace.") - else: - print(response.status_code) - except Exception as e: - print(f"{red_dot} Failed to create a shortcut for the '{table_name}' table: {e}") - -def create_shortcut(shortcut_name: str, location: str, subpath: str, source: str, connection_id: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): - - """ - Creates a [shortcut](https://learn.microsoft.com/fabric/onelake/onelake-shortcuts) to an ADLS Gen2 or Amazon S3 source. - - Parameters - ---------- - shortcut_name : str - location : str - subpath : str - source : str - connection_id: str - lakehouse : str - The Fabric lakehouse in which the shortcut will be created. - workspace : str, default=None - The name of the Fabric workspace in which the shortcut will be created. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - source_titles = { - 'adlsGen2': 'ADLS Gen2', - 'amazonS3': 'Amazon S3' - } - - sourceValues = list(source_titles.keys()) - - if source not in sourceValues: - print(f"{red_dot} The 'source' parameter must be one of these values: {sourceValues}.") - return - - sourceTitle = source_titles[source] - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - - client = fabric.FabricRestClient() - shortcutActualName = shortcut_name.replace(' ','') - - request_body = { - "path": 'Tables', - "name": shortcutActualName, - "target": { - source: { - "location": location, - "subpath": subpath, - "connectionId": connection_id} - } - } - - try: - response = client.post(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts",json=request_body) - if response.status_code == 201: - print(f"{green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'.") - else: - print(response.status_code) - except: - print(f"{red_dot} Failed to create a shortcut for the '{shortcut_name}' table.") - -def list_shortcuts(lakehouse: Optional[str] = None, workspace: Optional[str] = None): - - """ - Shows all shortcuts which exist in a Fabric lakehouse. - - Parameters - ---------- - lakehouse : str, default=None - The Fabric lakehouse name. - Defaults to None which resolves to the lakehouse attached to the notebook. - workspace : str, default=None - The name of the Fabric workspace in which lakehouse resides. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing all the shortcuts which exist in the specified lakehouse. - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - - df = pd.DataFrame(columns=['Shortcut Name', 'Shortcut Path', 'Source', 'Source Lakehouse Name', 'Source Workspace Name', 'Source Path', 'Source Connection ID', 'Source Location', 'Source SubPath']) - - client = fabric.FabricRestClient() - response = client.get(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts") - if response.status_code == 200: - for s in response.json()['value']: - shortcutName = s['name'] - shortcutPath = s['path'] - source = list(s['target'].keys())[0] - sourceLakehouseName, sourceWorkspaceName, sourcePath, connectionId, location, subpath = None, None, None, None, None, None - if source == 'oneLake': - sourceLakehouseId = s['target'][source]['itemId'] - sourcePath = s['target'][source]['path'] - sourceWorkspaceId = s['target'][source]['workspaceId'] - sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId) - sourceLakehouseName = resolve_lakehouse_name(sourceLakehouseId, sourceWorkspaceName) - else: - connectionId = s['target'][source]['connectionId'] - location = s['target'][source]['location'] - subpath = s['target'][source]['subpath'] - - new_data = {'Shortcut Name': shortcutName, 'Shortcut Path': shortcutPath, 'Source': source, 'Source Lakehouse Name': sourceLakehouseName, 'Source Workspace Name': sourceWorkspaceName, 'Source Path': sourcePath, 'Source Connection ID': connectionId, 'Source Location': location, 'Source SubPath': subpath} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - print(f"This function relies on an API which is not yet official as of May 21, 2024. Once the API becomes official this function will work as expected.") - return df - -def delete_shortcut(shortcut_name: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): - - """ - Deletes a shortcut. - - Parameters - ---------- - shortcut_name : str - The name of the shortcut. - lakehouse : str, default=None - The Fabric lakehouse name in which the shortcut resides. - Defaults to None which resolves to the lakehouse attached to the notebook. - workspace : str, default=None - The name of the Fabric workspace in which lakehouse resides. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - if lakehouse == None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - - client = fabric.FabricRestClient() - response = client.delete(f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}") - - if response.status_code == 200: - print(f"{green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted.") - else: - print(f"{red_dot} The '{shortcut_name}' has not been deleted.") \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 24a4a224..00000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -import setuptools - -with open("README.md", "r") as fh: - long_description = fh.read() - -setuptools.setup( - name="semantic-link-labs", - version="0.4.1", - author="Microsoft", - author_email="semanticdatascience@service.microsoft.com", - description="", - long_description=long_description, - long_description_content_type="text/markdown", - packages=setuptools.find_packages(), - install_requires=[ - 'semantic-link-sempy>=0.7.5', - 'anytree', - 'powerbiclient', - ], - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.8', -) \ No newline at end of file diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py new file mode 100644 index 00000000..9a6a5d86 --- /dev/null +++ b/src/sempy_labs/__init__.py @@ -0,0 +1,154 @@ +from sempy_labs._clear_cache import clear_cache + +# from sempy_labs._connections import ( +# create_connection_cloud, +# create_connection_vnet, +# create_connection_on_prem +# ) +from sempy_labs._dax import run_dax +from sempy_labs._generate_semantic_model import ( + create_blank_semantic_model, + create_semantic_model_from_bim, + # deploy_semantic_model, + get_semantic_model_bim, +) +from sempy_labs._list_functions import ( + get_object_level_security, + # list_annotations, + # list_columns, + list_dashboards, + list_dataflow_storage_accounts, + # list_datamarts, + # list_datapipelines, + # list_eventstreams, + # list_kpis, + # list_kqldatabases, + # list_kqlquerysets, + list_lakehouses, + # list_mirroredwarehouses, + # list_mlexperiments, + # list_mlmodels, + # list_relationships, + # list_sqlendpoints, + # list_tables, + list_warehouses, + # list_workspace_role_assignments, + create_warehouse, + update_item, +) + +from sempy_labs._helper_functions import ( + create_abfss_path, + format_dax_object_name, + create_relationship_name, + save_as_delta_table, + generate_embedded_filter, + get_direct_lake_sql_endpoint, + resolve_lakehouse_id, + resolve_lakehouse_name, + resolve_dataset_id, + resolve_dataset_name, + resolve_report_id, + resolve_report_name, + # language_validate +) +from sempy_labs._model_auto_build import model_auto_build +from sempy_labs._model_bpa import model_bpa_rules, run_model_bpa +from sempy_labs._model_dependencies import ( + measure_dependency_tree, + get_measure_dependencies, + get_model_calc_dependencies, +) +from sempy_labs._one_lake_integration import ( + export_model_to_onelake, +) + +# from sempy_labs._query_scale_out import ( +# qso_sync, +# qso_sync_status, +# set_qso, +# list_qso_settings, +# disable_qso, +# set_semantic_model_storage_format, +# set_workspace_default_storage_format, +# ) +from sempy_labs._refresh_semantic_model import ( + refresh_semantic_model, + cancel_dataset_refresh, +) +from sempy_labs._translations import translate_semantic_model +from sempy_labs._vertipaq import ( + vertipaq_analyzer, + # visualize_vertipaq, + import_vertipaq_analyzer, +) +from sempy_labs._tom import TOMWrapper, connect_semantic_model + +__all__ = [ + "clear_cache", + # create_connection_cloud, + # create_connection_vnet, + # create_connection_on_prem, + "run_dax", + "create_blank_semantic_model", + "create_semantic_model_from_bim", + #'deploy_semantic_model', + "get_semantic_model_bim", + "get_object_level_security", + #'list_annotations', + #'list_columns', + "list_dashboards", + "list_dataflow_storage_accounts", + #'list_datamarts', + #'list_datapipelines', + #'list_eventstreams', + #'list_kpis', + #'list_kqldatabases', + #'list_kqlquerysets', + "list_lakehouses", + #'list_mirroredwarehouses', + #'list_mlexperiments', + #'list_mlmodels', + #'list_relationships', + #'list_sqlendpoints', + #'list_tables', + "list_warehouses", + #'list_workspace_role_assignments', + "create_warehouse", + "update_item", + "create_abfss_path", + "format_dax_object_name", + "create_relationship_name", + "save_as_delta_table", + "generate_embedded_filter", + "get_direct_lake_sql_endpoint", + "resolve_lakehouse_id", + "resolve_lakehouse_name", + "resolve_dataset_id", + "resolve_dataset_name", + "resolve_report_id", + "resolve_report_name", + #'language_validate', + "model_auto_build", + "model_bpa_rules", + "run_model_bpa", + "measure_dependency_tree", + "get_measure_dependencies", + "get_model_calc_dependencies", + "export_model_to_onelake", + #'qso_sync', + #'qso_sync_status', + #'set_qso', + #'list_qso_settings', + #'disable_qso', + #'set_semantic_model_storage_format', + #'set_workspace_default_storage_format', + "refresh_semantic_model", + "cancel_dataset_refresh", + "translate_semantic_model", + "vertipaq_analyzer", + #'visualize_vertipaq', + "import_vertipaq_analyzer", + "TOMWrapper", + "connect_semantic_model", +] diff --git a/src/sempy_labs/_ai.py b/src/sempy_labs/_ai.py new file mode 100644 index 00000000..48638e27 --- /dev/null +++ b/src/sempy_labs/_ai.py @@ -0,0 +1,496 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from synapse.ml.services.openai import OpenAICompletion +from pyspark.sql.functions import col +from pyspark.sql import SparkSession +from typing import List, Optional, Union +from IPython.display import display + + +def optimize_semantic_model(dataset: str, workspace: Optional[str] = None): + + from ._model_bpa import run_model_bpa + from .directlake._fallback import check_fallback_reason + from ._helper_functions import format_dax_object_name + + modelBPA = run_model_bpa( + dataset=dataset, workspace=workspace, return_dataframe=True + ) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfC["Total Size"] = dfC["Total Size"].astype("int") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + + modelBPA_col = modelBPA[modelBPA["Object Type"] == "Column"] + modelBPA_col = pd.merge( + modelBPA_col, + dfC[["Column Object", "Total Size"]], + left_on="Object Name", + right_on="Column Object", + how="left", + ) + + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) + + if isDirectLake: + fallback = check_fallback_reason(dataset=dataset, workspace=workspace) + fallback_filt = fallback[fallback["FallbackReasonID"] == 2] + + if len(fallback_filt) > 0: + print( + f"The '{dataset}' semantic model is a Direct Lake semantic model which contains views. Since views always fall back to DirectQuery, it is recommended to only use lakehouse tables and not views." + ) + + # Potential model reduction estimate + ruleNames = [ + "Remove unnecessary columns", + "Set IsAvailableInMdx to false on non-attribute columns", + ] + + for rule in ruleNames: + df = modelBPA_col[modelBPA_col["Rule Name"] == rule] + df_filt = df[["Object Name", "Total Size"]].sort_values( + by="Total Size", ascending=False + ) + totSize = df["Total Size"].sum() + if len(df_filt) > 0: + print( + f"Potential savings of {totSize} bytes from following the '{rule}' rule." + ) + display(df_filt) + else: + print(f"The '{rule}' rule has been followed.") + + +def generate_measure_descriptions( + dataset: str, + measures: Union[str, List[str]], + gpt_model: Optional[str] = "gpt-35-turbo", + workspace: Optional[str] = None, +): + + service_name = "synapseml-openai" + + if isinstance(measures, str): + measures = [measures] + + validModels = ["gpt-35-turbo", "gpt-35-turbo-16k", "gpt-4"] + if gpt_model not in validModels: + print( + f"The '{gpt_model}' model is not a valid model. Enter a gpt_model from this list: {validModels}." + ) + return + + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + + if measures is not None: + dfM_filt = dfM[dfM["Measure Name"].isin(measures)] + else: + dfM_filt = dfM + + df = dfM_filt[["Table Name", "Measure Name", "Measure Expression"]] + + df["prompt"] = ( + f"The following is DAX code used by Microsoft Power BI. Please explain this code in simple terms:" + + df["Measure Expression"] + ) + + # Generate new column in df dataframe which has the AI-generated descriptions + completion = { + OpenAICompletion() + .setDeploymentName(gpt_model) + .setMaxTokens(200) + .setCustomServiceName(service_name) + .setPromptCol("prompt") + .setErrorCol("error") + .setOutputCol("completions") + } + + completed_df = completion.transform(df).cache() + completed_df.select( + col("prompt"), + col("error"), + col("completions.choices.text").getItem(0).alias("text"), + ) + + # Update the model to use the new descriptions + tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) + m = tom_server.Databases.GetByName(dataset).Model + + # for t in m.Tables: + # tName = t.Name + # for ms in t.Measures: + # mName = ms.Name + # mDesc = promptValue + + # m.SaveChanges() + + +def generate_aggs( + dataset: str, + table_name: str, + columns: Union[str, List[str]], + workspace: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + + from ._helper_functions import ( + get_direct_lake_sql_endpoint, + create_abfss_path, + format_dax_object_name, + resolve_lakehouse_id, + ) + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + import System + + # columns = { + #'SalesAmount': 'Sum', + #'ProductKey': 'GroupBy', + #'OrderDateKey': 'GroupBy' + # } + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if lakehouse_workspace == None: + lakehouse_workspace = workspace + lakehouse_workspace_id = workspace_id + else: + lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) + + if isinstance(columns, str): + columns = [columns] + + columnValues = columns.keys() + + aggTypes = ["Sum", "Count", "Min", "Max", "GroupBy"] + aggTypesAggregate = ["Sum", "Count", "Min", "Max"] + numericTypes = ["Int64", "Double", "Decimal"] + + if any(value not in aggTypes for value in columns.values()): + print( + f"Invalid aggregation type(s) have been specified in the 'columns' parameter. Valid aggregation types: {aggTypes}." + ) + return + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode. This function is only relevant for Direct Lake semantic models." + ) + return + + dfC_filtT = dfC[dfC["Table Name"] == table_name] + + if len(dfC_filtT) == 0: + print( + f"The '{table_name}' table does not exist in the '{dataset}' semantic model within the '{workspace}' workspace." + ) + return + + dfC_filt = dfC[ + (dfC["Table Name"] == table_name) & (dfC["Column Name"].isin(columnValues)) + ] + + if len(columns) != len(dfC_filt): + print( + f"Columns listed in '{columnValues}' do not exist in the '{table_name}' table in the '{dataset}' semantic model within the '{workspace}' workspace." + ) + return + + # Check if doing sum/count/min/max etc. on a non-number column + for col, agg in columns.items(): + dfC_col = dfC_filt[dfC_filt["Column Name"] == col] + dataType = dfC_col["Data Type"].iloc[0] + if agg in aggTypesAggregate and dataType not in numericTypes: + print( + f"The '{col}' column in the '{table_name}' table is of '{dataType}' data type. Only columns of '{numericTypes}' data types can be aggregated as '{aggTypesAggregate}' aggregation types." + ) + return + + # Create/update lakehouse delta agg table + aggSuffix = "_agg" + aggTableName = f"{table_name}{aggSuffix}" + aggLakeTName = aggTableName.lower().replace(" ", "_") + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Table Name"] == table_name] + lakeTName = dfP_filt["Query"].iloc[0] + + sqlEndpointId = get_direct_lake_sql_endpoint(dataset=dataset, workspace=workspace) + + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] + + if len(dfI_filt) == 0: + print( + f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter." + ) + return + + lakehouseName = dfI_filt["Display Name"].iloc[0] + lakehouse_id = resolve_lakehouse_id( + lakehouse=lakehouseName, workspace=lakehouse_workspace + ) + + # Generate SQL query + query = "SELECT" + groupBy = "\nGROUP BY" + for col, agg in columns.items(): + colFilt = dfC_filt[dfC_filt["Column Name"] == col] + sourceCol = colFilt["Source"].iloc[0] + + if agg == "GroupBy": + query = f"{query}\n{sourceCol}," + groupBy = f"{groupBy}\n{sourceCol}," + else: + query = f"{query}\n{agg}({sourceCol}) AS {sourceCol}," + + query = query[:-1] + + spark = SparkSession.builder.getOrCreate() + fromTablePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=lakeTName, + ) + df = spark.read.format("delta").load(fromTablePath) + tempTableName = "delta_table_" + lakeTName + df.createOrReplaceTempView(tempTableName) + sqlQuery = f"{query} \n FROM {tempTableName} {groupBy}" + + sqlQuery = sqlQuery[:-1] + print(sqlQuery) + + # Save query to spark dataframe + spark_df = spark.sql(sqlQuery) + f"\nCreating/updating the '{aggLakeTName}' table in the lakehouse..." + # Write spark dataframe to delta table + aggFilePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=aggLakeTName, + ) + spark_df.write.mode("overwrite").format("delta").save(aggFilePath) + f"The '{aggLakeTName}' table has been created/updated in the lakehouse." + + # Create/update semantic model agg table + tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) + m = tom_server.Databases.GetByName(dataset).Model + f"\nUpdating the '{dataset}' semantic model..." + dfC_agg = dfC[dfC["Table Name"] == aggTableName] + + if len(dfC_agg) == 0: + print(f"Creating the '{aggTableName}' table...") + exp = m.Expressions["DatabaseQuery"] + tbl = TOM.Table() + tbl.Name = aggTableName + tbl.IsHidden = True + + ep = TOM.EntityPartitionSource() + ep.Name = aggTableName + ep.EntityName = aggLakeTName + ep.ExpressionSource = exp + + part = TOM.Partition() + part.Name = aggTableName + part.Source = ep + part.Mode = TOM.ModeType.DirectLake + + tbl.Partitions.Add(part) + + for i, r in dfC_filt.iterrows(): + scName = r["Source"] + cName = r["Column Name"] + dType = r["Data Type"] + + col = TOM.DataColumn() + col.Name = cName + col.IsHidden = True + col.SourceColumn = scName + col.DataType = System.Enum.Parse(TOM.DataType, dType) + + tbl.Columns.Add(col) + print( + f"The '{aggTableName}'[{cName}] column has been added to the '{dataset}' semantic model." + ) + + m.Tables.Add(tbl) + print( + f"The '{aggTableName}' table has been added to the '{dataset}' semantic model." + ) + else: + print(f"Updating the '{aggTableName}' table's columns...") + # Remove existing columns + for t in m.Tables: + tName = t.Name + for c in t.Columns: + cName = c.Name + if t.Name == aggTableName: + m.Tables[tName].Columns.Remove(cName) + # Add columns + for i, r in dfC_filt.iterrows(): + scName = r["Source"] + cName = r["Column Name"] + dType = r["Data Type"] + + col = TOM.DataColumn() + col.Name = cName + col.IsHidden = True + col.SourceColumn = scName + col.DataType = System.Enum.Parse(TOM.DataType, dType) + + m.Tables[aggTableName].Columns.Add(col) + print(f"The '{aggTableName}'[{cName}] column has been added.") + + # Create relationships + relMap = {"m": "Many", "1": "One", "0": "None"} + + print(f"\nGenerating necessary relationships...") + for i, r in dfR.iterrows(): + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + cfb = r["Cross Filtering Behavior"] + sfb = r["Security Filtering Behavior"] + mult = r["Multiplicity"] + + crossFB = System.Enum.Parse(TOM.CrossFilteringBehavior, cfb) + secFB = System.Enum.Parse(TOM.SecurityFilteringBehavior, sfb) + fromCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, relMap.get(mult[0]) + ) + toCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, relMap.get(mult[-1]) + ) + + rel = TOM.SingleColumnRelationship() + rel.FromCardinality = fromCardinality + rel.ToCardinality = toCardinality + rel.IsActive = r["Active"] + rel.CrossFilteringBehavior = crossFB + rel.SecurityFilteringBehavior = secFB + rel.RelyOnReferentialIntegrity = r["Rely On Referential Integrity"] + + if fromTable == table_name: + try: + rel.FromColumn = m.Tables[aggTableName].Columns[fromColumn] + m.Relationships.Add(rel) + print( + f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has been added." + ) + except: + print( + f"'{aggTableName}'[{fromColumn}] -> '{toTable}'[{toColumn}] relationship has not been created." + ) + elif toTable == table_name: + try: + rel.ToColumn = m.Tables[aggTableName].Columns[toColumn] + m.Relationships.Add(rel) + print( + f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has been added." + ) + except: + print( + f"'{fromTable}'[{fromColumn}] -> '{aggTableName}'[{toColumn}] relationship has not been created." + ) + f"Relationship creation is complete." + + # Create IF measure + f"\nCreating measure to check if the agg table can be used..." + aggChecker = "IF(" + dfR_filt = dfR[ + (dfR["From Table"] == table_name) & (~dfR["From Column"].isin(columnValues)) + ] + + for i, r in dfR_filt.iterrows(): + toTable = r["To Table"] + aggChecker = f"{aggChecker}\nISCROSSFILTERED('{toTable}') ||" + + aggChecker = aggChecker[:-3] + aggChecker = f"{aggChecker},1,0)" + print(aggChecker) + + # Todo: add IFISFILTERED clause for columns + f"\n Creating the base measures in the agg table..." + # Create base agg measures + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" + SELECT + [TABLE] AS [Table Name] + ,[OBJECT] AS [Object Name] + ,[OBJECT_TYPE] AS [Object Type] + ,[REFERENCED_TABLE] AS [Referenced Table] + ,[REFERENCED_OBJECT] AS [Referenced Object] + ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] + FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY + WHERE [OBJECT_TYPE] = 'MEASURE' + """, + ) + + baseMeasures = dep[ + (dep["Referenced Object Type"] == "COLUMN") + & (dep["Referenced Table"] == table_name) + & (dep["Referenced Object"].isin(columnValues)) + ] + for i, r in baseMeasures.iterrows(): + tName = r["Table Name"] + mName = r["Object Name"] + cName = r["Referenced Object"] + dfM_filt = dfM[dfM["Measure Name"] == mName] + expr = dfM_filt["Measure Expression"].iloc[0] + + colFQNonAgg = format_dax_object_name(tName, cName) + colFQAgg = format_dax_object_name(aggTableName, cName) + colNQNonAgg = f"{tName}[{cName}]" + + if " " in tName: + newExpr = expr.replace(colFQNonAgg, colFQAgg) + else: + newExpr = expr.replace(colFQNonAgg, colFQAgg).replace(colNQNonAgg, colFQAgg) + print(expr) + print(newExpr) + + aggMName = mName + aggSuffix + measure = TOM.Measure() + measure.Name = aggMName + measure.IsHidden = True + measure.Expression = newExpr + m.Tables[aggTableName].Measures.Add(measure) + f"The '{aggMName}' measure has been created in the '{aggTableName}' table." + + # Update base detail measures + + # m.SaveChanges() + + +# Identify views used within Direct Lake model +# workspace = 'MK Demo 6' +# lakehouse = 'MyLakehouse' +# dataset = 'MigrationTest' +# lakehouse_workspace = workspace + +# dfView = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'View Name']) +# dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) +# isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) + +# spark = SparkSession.builder.getOrCreate() +# views = spark.sql(f"SHOW VIEWS IN {lakehouse}").collect() +# for view in views: +# viewName = view['viewName'] +# isTemporary = view['isTemporary'] +# new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'View Name': viewName} +# dfView = pd.concat([dfView, pd.DataFrame(new_data, index=[0])], ignore_index=True) +# dfView +# lakeT = get_lakehouse_tables(lakehouse, lakehouse_workspace) +# if not dfP['Query'].isin(lakeT['Table Name'].values): +# if diff --git a/sempy_labs/ClearCache.py b/src/sempy_labs/_clear_cache.py similarity index 66% rename from sempy_labs/ClearCache.py rename to src/sempy_labs/_clear_cache.py index 1b009444..cab8d8db 100644 --- a/sempy_labs/ClearCache.py +++ b/src/sempy_labs/_clear_cache.py @@ -1,15 +1,11 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_dataset_id +from ._helper_functions import resolve_dataset_id from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' def clear_cache(dataset: str, workspace: Optional[str] = None): - """ Clears the cache of a semantic model. @@ -21,17 +17,13 @@ def clear_cache(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - datasetID = resolve_dataset_id(dataset = dataset, workspace = workspace) + datasetID = resolve_dataset_id(dataset=dataset, workspace=workspace) xmla = f""" @@ -40,8 +32,8 @@ def clear_cache(dataset: str, workspace: Optional[str] = None): """ - fabric.execute_xmla(dataset = dataset,xmla_command=xmla, workspace = workspace) + fabric.execute_xmla(dataset=dataset, xmla_command=xmla, workspace=workspace) + + outputtext = f"{icons.green_dot} Cache cleared for the '{dataset}' semantic model within the '{workspace}' workspace." - outputtext = f"{green_dot} Cache cleared for the '{dataset}' semantic model within the '{workspace}' workspace." - - return outputtext \ No newline at end of file + return outputtext diff --git a/src/sempy_labs/_connections.py b/src/sempy_labs/_connections.py new file mode 100644 index 00000000..de310ee6 --- /dev/null +++ b/src/sempy_labs/_connections.py @@ -0,0 +1,234 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from typing import List, Optional, Union + + +def create_connection_cloud( + name: str, + server_name: str, + database_name: str, + user_name: str, + password: str, + privacy_level: str, +): + + # https://review.learn.microsoft.com/en-us/rest/api/fabric/core/connections/create-connection?branch=features%2Fdmts&tabs=HTTP + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) + + client = fabric.FabricRestClient() + + request_body = { + "connectivityType": "ShareableCloud", + "name": name, + "connectionDetails": { + "type": "SQL", + "parameters": [ + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], + }, + "privacyLevel": privacy_level, + "credentialDetails": { + "singleSignOnType": "None", + "connectionEncryption": "NotEncrypted", + "skipTestConnection": False, + "credentials": { + "credentialType": "Basic", + "username": user_name, + "password": password, + }, + }, + } + + response = client.post(f"/v1/connections", json=request_body) + + if response.status_code == 200: + o = response.json() + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) + + return df + else: + print(response.status_code) + + +def create_connection_on_prem( + name: str, + gateway_id: str, + server_name: str, + database_name: str, + credentials: str, + privacy_level: str, +): + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Gateway ID", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) + + client = fabric.FabricRestClient() + + request_body = { + "connectivityType": "OnPremisesDataGateway", + "gatewayId": gateway_id, + "name": name, + "connectionDetails": { + "type": "SQL", + "parameters": [ + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], + }, + "privacyLevel": privacy_level, + "credentialDetails": { + "singleSignOnType": "None", + "connectionEncryption": "NotEncrypted", + "skipTestConnection": False, + "credentials": { + "credentialType": "Windows", + "values": [{"gatewayId": gateway_id, "credentials": credentials}], + }, + }, + } + + response = client.post(f"/v1/connections", json=request_body) + + if response.status_code == 200: + o = response.json() + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Gateway ID": o["gatewayId"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) + + return df + else: + print(response.status_code) + + +def create_connection_vnet( + name: str, + gateway_id: str, + server_name: str, + database_name: str, + user_name: str, + password: str, + privacy_level: str, +): + + df = pd.DataFrame( + columns=[ + "Connection ID", + "Connection Name", + "Gateway ID", + "Connectivity Type", + "Connection Type", + "Connection Path", + "Privacy Level", + "Credential Type", + "Single Sign On Type", + "Connection Encryption", + "Skip Test Connection", + ] + ) + + client = fabric.FabricRestClient() + + request_body = { + "connectivityType": "VirtualNetworkDataGateway", + "gatewayId": gateway_id, + "name": name, + "connectionDetails": { + "type": "SQL", + "parameters": [ + {"name": "server", "value": server_name}, + {"name": "database", "value": database_name}, + ], + }, + "privacyLevel": privacy_level, + "credentialDetails": { + "singleSignOnType": "None", + "connectionEncryption": "Encrypted", + "skipTestConnection": False, + "credentials": { + "credentialType": "Basic", + "username": user_name, + "password": password, + }, + }, + } + + response = client.post(f"/v1/connections", json=request_body) + + if response.status_code == 200: + o = response.json() + new_data = { + "Connection Id": o["id"], + "Connection Name": o["name"], + "Gateway ID": o["gatewayId"], + "Connectivity Type": o["connectivityType"], + "Connection Type": o["connectionDetails"]["type"], + "Connection Path": o["connectionDetails"]["path"], + "Privacy Level": o["privacyLevel"], + "Credential Type": o["credentialDetails"]["credentialType"], + "Single Sign On Type": o["credentialDetails"]["singleSignOnType"], + "Connection Encryption": o["credentialDetails"]["connectionEncryption"], + "Skip Test Connection": o["credentialDetails"]["skipTestConnection"], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["Skip Test Connection"] = df["Skip Test Connection"].astype(bool) + + return df + else: + print(response.status_code) diff --git a/src/sempy_labs/_dax.py b/src/sempy_labs/_dax.py new file mode 100644 index 00000000..75b29f94 --- /dev/null +++ b/src/sempy_labs/_dax.py @@ -0,0 +1,70 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import resolve_dataset_id +from typing import List, Optional, Union +from sempy._utils._log import log + + +@log +def run_dax( + dataset: str, + dax_query: str, + user_name: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Runs a DAX query against a semantic model using the `REST API `_. + + Compared to evaluate_dax this allows passing the user name for impersonation. + Note that the REST API has significant limitations compared to the XMLA endpoint. + + Parameters + ---------- + dataset : str + Name of the semantic model. + dax_query : str + The DAX query. + user_name : str + The user name (i.e. hello@goodbye.com). + Defaults to None which resolves to no user impersonation. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe holding the result of the DAX query. + """ + + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries-in-group + + if workspace is None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + dataset_id = resolve_dataset_id(dataset=dataset, workspace=workspace) + + if user_name is None: + request_body = {"queries": [{"query": dax_query}]} + else: + request_body = { + "queries": [{"query": dax_query}], + "impersonatedUserName": user_name, + } + + client = fabric.PowerBIRestClient() + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/executeQueries", + json=request_body, + ) + data = response.json()["results"][0]["tables"] + column_names = data[0]["rows"][0].keys() + data_rows = [row.values() for item in data for row in item["rows"]] + df = pd.DataFrame(data_rows, columns=column_names) + + return df diff --git a/src/sempy_labs/_generate_semantic_model.py b/src/sempy_labs/_generate_semantic_model.py new file mode 100644 index 00000000..c7b79d00 --- /dev/null +++ b/src/sempy_labs/_generate_semantic_model.py @@ -0,0 +1,280 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import json, base64, time, os +from typing import List, Optional, Union +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +import sempy_labs._icons as icons + + +def create_blank_semantic_model( + dataset: str, + compatibility_level: int = 1605, + workspace: Optional[str] = None, +): + """ + Creates a new blank semantic model (no tables/columns etc.). + + Parameters + ---------- + dataset : str + Name of the semantic model. + compatibility_level : int + The compatibility level of the semantic model. + Defaults to 1605. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if compatibility_level < 1500: + print(f"{icons.red_dot} Compatiblity level must be at least 1500.") + return + + tmsl = f""" + {{ + "createOrReplace": {{ + "object": {{ + "database": '{dataset}' + }}, + "database": {{ + "name": '{dataset}', + "compatibilityLevel": {compatibility_level}, + "model": {{ + "culture": "en-US", + "defaultPowerBIDataSourceVersion": "powerBI_V3" + }} + }} + }} + }} + """ + + fabric.execute_tmsl(script=tmsl, workspace=workspace) + + return print( + f"{icons.green_dot} The '{dataset}' semantic model was created within the '{workspace}' workspace." + ) + + +def create_semantic_model_from_bim( + dataset: str, bim_file: str, workspace: Optional[str] = None +): + """ + Creates a new semantic model based on a Model.bim file. + + Parameters + ---------- + dataset : str + Name of the semantic model. + bim_file : str + The model.bim file. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + objectType = "SemanticModel" + + dfI = fabric.list_items(workspace=workspace, type=objectType) + dfI_filt = dfI[(dfI["Display Name"] == dataset)] + + if len(dfI_filt) > 0: + print( + f"WARNING: '{dataset}' already exists as a semantic model in the '{workspace}' workspace." + ) + return + + client = fabric.FabricRestClient() + defPBIDataset = {"version": "1.0", "settings": {}} + + def conv_b64(file): + + loadJson = json.dumps(file) + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + + return f + + payloadPBIDefinition = conv_b64(defPBIDataset) + payloadBim = conv_b64(bim_file) + + request_body = { + "displayName": dataset, + "type": objectType, + "definition": { + "parts": [ + { + "path": "model.bim", + "payload": payloadBim, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbidataset", + "payload": payloadPBIDefinition, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post(f"/v1/workspaces/{workspace_id}/items", json=request_body) + + if response.status_code == 201: + print( + f"The '{dataset}' semantic model has been created within the '{workspace}' workspace." + ) + print(response.json()) + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + print( + f"The '{dataset}' semantic model has been created within the '{workspace}' workspace." + ) + print(response.json()) + + +def deploy_semantic_model( + dataset: str, + new_dataset: Optional[str] = None, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Deploys a semantic model based on an existing semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model to deploy. + new_dataset: str + Name of the new semantic model to be created. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str, default=None + The Fabric workspace name in which the new semantic model will be deployed. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if new_dataset is None: + new_dataset = dataset + + if new_dataset == dataset and new_dataset_workspace == workspace: + print( + f"The 'dataset' and 'new_dataset' parameters have the same value. And, the 'workspace' and 'new_dataset_workspace' parameters have the same value. At least one of these must be different. Please update the parameters." + ) + return + + bim = get_semantic_model_bim(dataset=dataset, workspace=workspace) + + create_semantic_model_from_bim( + dataset=new_dataset, bim_file=bim, workspace=new_dataset_workspace + ) + + +def get_semantic_model_bim( + dataset: str, + workspace: Optional[str] = None, + save_to_file_name: Optional[str] = None, +): + """ + Extracts the Model.bim file for a given semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + save_to_file_name : str, default=None + If specified, saves the Model.bim as a file in the lakehouse attached to the notebook. + + Returns + ------- + str + The Model.bim file for the semantic model. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + objType = "SemanticModel" + client = fabric.FabricRestClient() + itemList = fabric.list_items(workspace=workspace, type=objType) + itemListFilt = itemList[(itemList["Display Name"] == dataset)] + itemId = itemListFilt["Id"].iloc[0] + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" + ) + + if response.status_code == 200: + res = response.json() + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + res = response.json() + df_items = pd.json_normalize(res["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "model.bim"] + payload = df_items_filt["payload"].iloc[0] + bimFile = base64.b64decode(payload).decode("utf-8") + bimJson = json.loads(bimFile) + + if save_to_file_name is not None: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + folderPath = "/lakehouse/default/Files" + fileExt = ".bim" + if not save_to_file_name.endswith(fileExt): + save_to_file_name = save_to_file_name + fileExt + filePath = os.path.join(folderPath, save_to_file_name) + with open(filePath, "w") as json_file: + json.dump(bimJson, json_file, indent=4) + print( + f"The .bim file for the '{dataset}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + ) + + return bimJson diff --git a/sempy_labs/HelperFunctions.py b/src/sempy_labs/_helper_functions.py similarity index 60% rename from sempy_labs/HelperFunctions.py rename to src/sempy_labs/_helper_functions.py index d6a8ebf1..e1c13edb 100644 --- a/sempy_labs/HelperFunctions.py +++ b/src/sempy_labs/_helper_functions.py @@ -1,17 +1,15 @@ -import sempy import sempy.fabric as fabric import re +import pandas as pd from pyspark.sql import SparkSession -from typing import List, Optional, Union +from typing import Optional, Tuple from uuid import UUID +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' - -def create_abfss_path(lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str): +def create_abfss_path( + lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: str +): """ Creates an abfss path for a delta table in a Fabric lakehouse. @@ -32,16 +30,16 @@ def create_abfss_path(lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_ta return f"abfss://{lakehouse_workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}/Tables/{delta_table_name}" -def format_dax_object_name(a: str,b: str): +def format_dax_object_name(table: str, column: str): """ Formats a table/column combination to the 'Table Name'[Column Name] format. Parameters ---------- - a : str + table : str The name of the table. - b : str + column : str The name of the column. Returns @@ -49,11 +47,13 @@ def format_dax_object_name(a: str,b: str): str The fully qualified object name. """ - - return "'" + a + "'[" + b + "]" -def create_relationship_name(from_table: str, from_column: str, to_table: str, to_column: str): + return "'" + table + "'[" + column + "]" + +def create_relationship_name( + from_table: str, from_column: str, to_table: str, to_column: str +): """ Formats a relationship's table/columns into a fully qualified name. @@ -71,13 +71,17 @@ def create_relationship_name(from_table: str, from_column: str, to_table: str, t Returns ------- str - The fully qualified relationship name. + The fully qualified relationship name. """ - return format_dax_object_name(from_table, from_column) + ' -> ' + format_dax_object_name(to_table, to_column) + return ( + format_dax_object_name(from_table, from_column) + + " -> " + + format_dax_object_name(to_table, to_column) + ) -def resolve_report_id(report: str, workspace: Optional[str] = None): +def resolve_report_id(report: str, workspace: Optional[str] = None): """ Obtains the ID of the Power BI report. @@ -95,22 +99,17 @@ def resolve_report_id(report: str, workspace: Optional[str] = None): UUID The ID of the Power BI report. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = report, type = 'Report', workspace = workspace) - - #objectType = 'Report' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == report)] - #obj = dfI_filt['Id'].iloc[0] + obj = fabric.resolve_item_id(item_name=report, type="Report", workspace=workspace) return obj -def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): +def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the Power BI report. @@ -128,23 +127,19 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str] = None): str The name of the Power BI report. """ - - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = report_id, type = 'Report', workspace = workspace) - - #objectType = 'Report' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == report_id)] - #obj = dfI_filt['Display Name'].iloc[0] + obj = fabric.resolve_item_name( + item_id=report_id, type="Report", workspace=workspace + ) return obj -def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): +def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): """ Obtains the ID of the semantic model. @@ -162,22 +157,19 @@ def resolve_dataset_id(dataset: str, workspace: Optional[str] = None): UUID The ID of the semantic model. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = dataset, type = 'SemanticModel', workspace = workspace) - - #objectType = 'SemanticModel' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == dataset)] - #obj = dfI_filt['Id'].iloc[0] + obj = fabric.resolve_item_id( + item_name=dataset, type="SemanticModel", workspace=workspace + ) return obj -def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): +def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the semantic model. @@ -195,22 +187,19 @@ def resolve_dataset_name(dataset_id: UUID, workspace: Optional[str] = None): str The name of the semantic model. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = dataset_id, type = 'SemanticModel', workspace = workspace) - - #objectType = 'SemanticModel' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == dataset_id)] - #obj = dfI_filt['Display Name'].iloc[0] + obj = fabric.resolve_item_name( + item_id=dataset_id, type="SemanticModel", workspace=workspace + ) return obj -def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): +def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): """ Obtains the name of the Fabric lakehouse. @@ -228,27 +217,19 @@ def resolve_lakehouse_name(lakehouse_id: UUID, workspace: Optional[str] = None): str The name of the Fabric lakehouse. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_name(item_id = lakehouse_id, type = 'Lakehouse', workspace = workspace) - - #objectType = 'Lakehouse' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Id'] == lakehouse_id)] - - #if len(dfI_filt) == 0: - # print(f"The '{lakehouse_id}' Lakehouse Id does not exist within the '{workspace}' workspace.") - # return - - #obj = dfI_filt['Display Name'].iloc[0] + obj = fabric.resolve_item_name( + item_id=lakehouse_id, type="Lakehouse", workspace=workspace + ) return obj -def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): +def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): """ Obtains the ID of the Fabric lakehouse. @@ -266,27 +247,19 @@ def resolve_lakehouse_id(lakehouse: str, workspace: Optional[str] = None): UUID The ID of the Fabric lakehouse. """ - + if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - obj = fabric.resolve_item_id(item_name = lakehouse, type = 'Lakehouse', workspace = workspace) - - #objectType = 'Lakehouse' - #dfI = fabric.list_items(workspace = workspace, type = objectType) - #dfI_filt = dfI[(dfI['Display Name'] == lakehouse)] - - #if len(dfI_filt) == 0: - # print(f"The '{lakehouse}' lakehouse does not exist within the '{workspace}' workspace.") - # return - - #obj = dfI_filt['Id'].iloc[0] + obj = fabric.resolve_item_id( + item_name=lakehouse, type="Lakehouse", workspace=workspace + ) return obj -def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): +def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None) -> UUID: """ Obtains the SQL Endpoint ID of the semantic model. @@ -301,7 +274,7 @@ def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): Returns ------- - UUID + uuid.UUID The ID of SQL Endpoint. """ @@ -309,24 +282,26 @@ def get_direct_lake_sql_endpoint(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode.") + print( + f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode." + ) return - - dfE = fabric.list_expressions(dataset = dataset, workspace = workspace) - dfE_filt = dfE[dfE['Name']== 'DatabaseQuery'] - expr = dfE_filt['Expression'].iloc[0] + + dfE = fabric.list_expressions(dataset=dataset, workspace=workspace) + dfE_filt = dfE[dfE["Name"] == "DatabaseQuery"] + expr = dfE_filt["Expression"].iloc[0] matches = re.findall(r'"([^"]*)"', expr) sqlEndpointId = matches[1] - + return sqlEndpointId -def generate_embedded_filter(filter: str): +def generate_embedded_filter(filter: str): """ Converts the filter expression to a filter expression which can be used by a Power BI embedded URL. @@ -344,27 +319,60 @@ def generate_embedded_filter(filter: str): pattern = r"'[^']+'\[[^\[]+\]" matches = re.findall(pattern, filter) for match in matches: - matchReplace = match.replace("'",'').replace('[','/').replace(']','')\ - .replace(' ','_x0020_').replace('@','_00x40_').replace('+','_0x2B_').replace('{','_007B_').replace('}','_007D_') + matchReplace = ( + match.replace("'", "") + .replace("[", "/") + .replace("]", "") + .replace(" ", "_x0020_") + .replace("@", "_00x40_") + .replace("+", "_0x2B_") + .replace("{", "_007B_") + .replace("}", "_007D_") + ) filter = filter.replace(match, matchReplace) - + pattern = r"\[[^\[]+\]" matches = re.findall(pattern, filter) for match in matches: - matchReplace = match.replace("'",'').replace('[','/').replace(']','')\ - .replace(' ','_x0020_').replace('@','_00x40_').replace('+','_0x2B_').replace('{','_007B_').replace('}','_007D_') + matchReplace = ( + match.replace("'", "") + .replace("[", "/") + .replace("]", "") + .replace(" ", "_x0020_") + .replace("@", "_00x40_") + .replace("+", "_0x2B_") + .replace("{", "_007B_") + .replace("}", "_007D_") + ) filter = filter.replace(match, matchReplace) - revised_filter = filter.replace('<=','le').replace('>=','ge').replace('<>','ne').replace('!=','ne')\ - .replace('==','eq').replace('=','eq').replace('<','lt').replace('>','gt')\ - .replace(' && ',' and ').replace(' & ',' and ')\ - .replace(' || ',' or ').replace(' | ',' or ')\ - .replace('{','(').replace('}',')') - + revised_filter = ( + filter.replace("<=", "le") + .replace(">=", "ge") + .replace("<>", "ne") + .replace("!=", "ne") + .replace("==", "eq") + .replace("=", "eq") + .replace("<", "lt") + .replace(">", "gt") + .replace(" && ", " and ") + .replace(" & ", " and ") + .replace(" || ", " or ") + .replace(" | ", " or ") + .replace("{", "(") + .replace("}", ")") + ) + return revised_filter -def save_as_delta_table(dataframe, delta_table_name: str, write_mode: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def save_as_delta_table( + dataframe, + delta_table_name: str, + write_mode: str, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): """ Saves a pandas dataframe as a delta table in a Fabric lakehouse. @@ -398,26 +406,101 @@ def save_as_delta_table(dataframe, delta_table_name: str, write_mode: str, lakeh if lakehouse is None: lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id=lakehouse_id, workspace=workspace) + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) else: lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - writeModes = ['append', 'overwrite'] + writeModes = ["append", "overwrite"] write_mode = write_mode.lower() if write_mode not in writeModes: - print(f"{red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {writeModes}.") + print( + f"{icons.red_dot} Invalid 'write_type' parameter. Choose from one of the following values: {writeModes}." + ) return - if ' ' in delta_table_name: - print(f"{red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names.") + if " " in delta_table_name: + print( + f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names." + ) return - - dataframe.columns = dataframe.columns.str.replace(' ', '_') + + dataframe.columns = dataframe.columns.str.replace(" ", "_") spark = SparkSession.builder.getOrCreate() spark_df = spark.createDataFrame(dataframe) - filePath = create_abfss_path(lakehouse_id = lakehouse_id, lakehouse_workspace_id = workspace_id, delta_table_name = delta_table_name) - spark_df.write.mode(write_mode).format('delta').save(filePath) - print(f"{green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace.") \ No newline at end of file + filePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=workspace_id, + delta_table_name=delta_table_name, + ) + spark_df.write.mode(write_mode).format("delta").save(filePath) + print( + f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse}' lakehouse within the '{workspace}' workspace." + ) + + +def language_validate(language: str): + """ + Validateds that the language specified exists within the supported langauges. + + Parameters + ---------- + language : str + The language code. + + Returns + ------- + bool + A True/False indication as to whether the language code is supported. + """ + + url = "https://learn.microsoft.com/azure/ai-services/translator/language-support" + + tables = pd.read_html(url) + df = tables[0] + + df_filt = df[df["Language code"] == language] + + df_filt2 = df[df["Language"] == language.capitalize()] + + if len(df_filt) == 1: + lang = df_filt["Language"].iloc[0] + elif len(df_filt2) == 1: + lang = df_filt2["Language"].iloc[0] + else: + print( + f"The '{language}' language is not a valid language code. Please refer to this link for a list of valid language codes: {url}." + ) + return + + return lang + + +def resolve_workspace_name_and_id(workspace: Optional[str] = None) -> Tuple[str, str]: + """ + Obtains the name and ID of the Fabric workspace. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str, str + The name and ID of the Fabric workspace. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspace_id = fabric.resolve_workspace_id(workspace) + + return workspace, workspace_id diff --git a/src/sempy_labs/_icons.py b/src/sempy_labs/_icons.py new file mode 100644 index 00000000..2547eb5f --- /dev/null +++ b/src/sempy_labs/_icons.py @@ -0,0 +1,4 @@ +green_dot = "\U0001F7E2" +yellow_dot = "\U0001F7E1" +red_dot = "\U0001F534" +in_progress = "⌛" diff --git a/sempy_labs/ListFunctions.py b/src/sempy_labs/_list_functions.py similarity index 50% rename from sempy_labs/ListFunctions.py rename to src/sempy_labs/_list_functions.py index 6e63a69a..941dd7e4 100644 --- a/sempy_labs/ListFunctions.py +++ b/src/sempy_labs/_list_functions.py @@ -1,13 +1,12 @@ -import sempy import sempy.fabric as fabric +from sempy_labs._helper_functions import resolve_workspace_name_and_id import pandas as pd import json, time from pyspark.sql import SparkSession -from .GetDirectLakeLakehouse import get_direct_lake_lakehouse -from typing import List, Optional, Union +from typing import Optional -def get_object_level_security(dataset: str, workspace: Optional[str] = None): +def get_object_level_security(dataset: str, workspace: Optional[str] = None): """ Shows the object level security for the semantic model. @@ -19,7 +18,7 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -33,7 +32,7 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Role Name', 'Object Type', 'Table Name', 'Object Name']) + df = pd.DataFrame(columns=["Role Name", "Object Type", "Table Name", "Object Name"]) for r in m.Roles: for tp in r.TablePermissions: @@ -41,18 +40,32 @@ def get_object_level_security(dataset: str, workspace: Optional[str] = None): columnCount = len(tp.ColumnPermissions) objectType = "Table" if columnCount == 0: - new_data = {'Role Name': r.Name, 'Object Type': objectType, 'Table Name': tp.Name, 'Object Name': tp.Name} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Role Name": r.Name, + "Object Type": objectType, + "Table Name": tp.Name, + "Object Name": tp.Name, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) else: objectType = "Column" for cp in tp.ColumnPermissions: - new_data = {'Role Name': r.Name, 'Object Type': objectType, 'Table Name': tp.Name, 'Object Name': cp.Name} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Role Name": r.Name, + "Object Type": objectType, + "Table Name": tp.Name, + "Object Name": cp.Name, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df -def list_tables(dataset: str, workspace: Optional[str] = None): +def list_tables(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's tables and their properties. @@ -64,7 +77,7 @@ def list_tables(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -78,7 +91,17 @@ def list_tables(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Name', 'Type', 'Hidden', 'Data Category', 'Description', 'Refresh Policy', 'Source Expression']) + df = pd.DataFrame( + columns=[ + "Name", + "Type", + "Hidden", + "Data Category", + "Description", + "Refresh Policy", + "Source Expression", + ] + ) for t in m.Tables: tableType = "Table" @@ -94,13 +117,21 @@ def list_tables(dataset: str, workspace: Optional[str] = None): if rPolicy: sourceExpression = t.RefreshPolicy.SourceExpression - new_data = {'Name': t.Name, 'Type': tableType, 'Hidden': t.IsHidden, 'Data Category': t.DataCategory, 'Description': t.Description, 'Refresh Policy': rPolicy, 'Source Expression': sourceExpression} + new_data = { + "Name": t.Name, + "Type": tableType, + "Hidden": t.IsHidden, + "Data Category": t.DataCategory, + "Description": t.Description, + "Refresh Policy": rPolicy, + "Source Expression": sourceExpression, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_annotations(dataset: str, workspace: Optional[str] = None): +def list_annotations(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's annotations and their properties. @@ -112,7 +143,7 @@ def list_annotations(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame @@ -126,108 +157,201 @@ def list_annotations(dataset: str, workspace: Optional[str] = None): tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model - df = pd.DataFrame(columns=['Object Name', 'Parent Object Name', 'Object Type', 'Annotation Name', 'Annotation Value']) + df = pd.DataFrame( + columns=[ + "Object Name", + "Parent Object Name", + "Object Type", + "Annotation Name", + "Annotation Value", + ] + ) mName = m.Name - for a in m.Annotations: - objectType = 'Model' + for a in m.Annotations: + objectType = "Model" aName = a.Name aValue = a.Value - new_data = {'Object Name': mName, 'Parent Object Name': "N/A", 'Object Type': objectType,'Annotation Name': aName, 'Annotation Value': aValue} + new_data = { + "Object Name": mName, + "Parent Object Name": "N/A", + "Object Type": objectType, + "Annotation Name": aName, + "Annotation Value": aValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for t in m.Tables: - objectType = 'Table' + objectType = "Table" tName = t.Name for ta in t.Annotations: taName = ta.Name taValue = ta.Value - new_data = {'Object Name': tName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': taName, 'Annotation Value': taValue} + new_data = { + "Object Name": tName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": taName, + "Annotation Value": taValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for p in t.Partitions: pName = p.Name - objectType = 'Partition' + objectType = "Partition" for pa in p.Annotations: - paName = paName - paValue = paValue - new_data = {'Object Name': pName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': paName, 'Annotation Value': paValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + paName = pa.Name + paValue = pa.Value + new_data = { + "Object Name": pName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": paName, + "Annotation Value": paValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for c in t.Columns: - objectType = 'Column' - cName = c.Name + objectType = "Column" + cName = c.Name for ca in c.Annotations: caName = ca.Name caValue = ca.Value - new_data = {'Object Name': cName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': caName, 'Annotation Value': caValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": cName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": caName, + "Annotation Value": caValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for ms in t.Measures: - objectType = 'Measure' + objectType = "Measure" measName = ms.Name for ma in ms.Annotations: maName = ma.Name maValue = ma.Value - new_data = {'Object Name': measName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': maName, 'Annotation Value': maValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": measName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": maName, + "Annotation Value": maValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for h in t.Hierarchies: - objectType = 'Hierarchy' + objectType = "Hierarchy" hName = h.Name for ha in h.Annotations: haName = ha.Name haValue = ha.Value - new_data = {'Object Name': hName, 'Parent Object Name': tName, 'Object Type': objectType,'Annotation Name': haName, 'Annotation Value': haValue} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Object Name": hName, + "Parent Object Name": tName, + "Object Type": objectType, + "Annotation Name": haName, + "Annotation Value": haValue, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) for d in m.DataSources: dName = d.Name - objectType = 'Data Source' + objectType = "Data Source" for da in d.Annotations: daName = da.Name daValue = da.Value - new_data = {'Object Name': dName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': daName, 'Annotation Value': daValue} + new_data = { + "Object Name": dName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": daName, + "Annotation Value": daValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for r in m.Relationships: rName = r.Name - objectType = 'Relationship' + objectType = "Relationship" for ra in r.Annotations: raName = ra.Name raValue = ra.Value - new_data = {'Object Name': rName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': raName, 'Annotation Value': raValue} + new_data = { + "Object Name": rName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": raName, + "Annotation Value": raValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for cul in m.Cultures: culName = cul.Name - objectType = 'Translation' + objectType = "Translation" for cula in cul.Annotations: culaName = cula.Name culaValue = cula.Value - new_data = {'Object Name': culName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': culaName, 'Annotation Value': culaValue} + new_data = { + "Object Name": culName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": culaName, + "Annotation Value": culaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for e in m.Expressions: eName = e.Name - objectType = 'Expression' + objectType = "Expression" for ea in e.Annotations: eaName = ea.Name eaValue = ea.Value - new_data = {'Object Name': eName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': eaName, 'Annotation Value': eaValue} + new_data = { + "Object Name": eName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": eaName, + "Annotation Value": eaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for per in m.Perspectives: perName = per.Name - objectType = 'Perspective' + objectType = "Perspective" for pera in per.Annotations: peraName = pera.Name peraValue = pera.Value - new_data = {'Object Name': perName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': peraName, 'Annotation Value': peraValue} + new_data = { + "Object Name": perName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": peraName, + "Annotation Value": peraValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) for rol in m.Roles: rolName = rol.Name - objectType = 'Role' + objectType = "Role" for rola in rol.Annotations: rolaName = rola.Name rolaValue = rola.Value - new_data = {'Object Name': rolName, 'Parent Object Name': mName, 'Object Type': objectType,'Annotation Name': rolaName, 'Annotation Value': rolaValue} + new_data = { + "Object Name": rolName, + "Parent Object Name": mName, + "Object Type": objectType, + "Annotation Name": rolaName, + "Annotation Value": rolaValue, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def list_columns( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Shows a semantic model's columns and their properties. @@ -246,42 +370,60 @@ def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optio The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the semantic model's columns and their properties. """ + from sempy_labs.directlake._get_directlake_lakehouse import ( + get_direct_lake_lakehouse, + ) if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) - isDirectLake = any(r['Mode'] == 'DirectLake' for i, r in dfP.iterrows()) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) if isDirectLake: - dfC['Column Cardinality'] = None + dfC["Column Cardinality"] = None sql_statements = [] - lakeID, lakeName = get_direct_lake_lakehouse(dataset = dataset, workspace = workspace, lakehouse = lakehouse, lakehouse_workspace = lakehouse_workspace) - - for table_name in dfC['Table Name'].unique(): + (lakeID, lakeName) = get_direct_lake_lakehouse( + dataset=dataset, + workspace=workspace, + lakehouse=lakehouse, + lakehouse_workspace=lakehouse_workspace, + ) + + for table_name in dfC["Table Name"].unique(): print(f"Gathering stats for table: '{table_name}'...") - query = 'SELECT ' - - columns_in_table = dfC.loc[dfC['Table Name'] == table_name, 'Column Name'].unique() - + query = "SELECT " + + columns_in_table = dfC.loc[ + dfC["Table Name"] == table_name, "Column Name" + ].unique() + # Loop through columns within those tables for column_name in columns_in_table: - scName = dfC.loc[(dfC['Table Name'] == table_name) & (dfC['Column Name'] == column_name), 'Source'].iloc[0] - lakeTName = dfC.loc[(dfC['Table Name'] == table_name) & (dfC['Column Name'] == column_name), 'Query'].iloc[0] + scName = dfC.loc[ + (dfC["Table Name"] == table_name) + & (dfC["Column Name"] == column_name), + "Source", + ].iloc[0] + lakeTName = dfC.loc[ + (dfC["Table Name"] == table_name) + & (dfC["Column Name"] == column_name), + "Query", + ].iloc[0] # Build the query to be executed dynamically query = query + f"COUNT(DISTINCT({scName})) AS {scName}, " - + query = query[:-2] query = query + f" FROM {lakehouse}.{lakeTName}" sql_statements.append((table_name, query)) @@ -294,20 +436,20 @@ def list_columns(dataset: str, workspace: Optional[str] = None, lakehouse: Optio # Run the query df = spark.sql(query) - + for column in df.columns: x = df.collect()[0][column] for i, r in dfC.iterrows(): - if r['Table Name'] == tName and r['Source'] == column: - dfC.at[i, 'Column Cardinality'] = x + if r["Table Name"] == tName and r["Source"] == column: + dfC.at[i, "Column Cardinality"] = x # Remove column added temporarily - dfC.drop(columns=['Query'], inplace=True) + dfC.drop(columns=["Query"], inplace=True) return dfC -def list_dashboards(workspace: Optional[str] = None): +def list_dashboards(workspace: Optional[str] = None): """ Shows a list of the dashboards within a workspace. @@ -317,16 +459,27 @@ def list_dashboards(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the dashboards within a workspace. """ - df = pd.DataFrame(columns=['Dashboard ID', 'Dashboard Name', 'Read Only', 'Web URL', 'Embed URL', 'Data Classification', 'Users', 'Subscriptions']) - - if workspace == 'None': + df = pd.DataFrame( + columns=[ + "Dashboard ID", + "Dashboard Name", + "Read Only", + "Web URL", + "Embed URL", + "Data Classification", + "Users", + "Subscriptions", + ] + ) + + if workspace == "None": workspace_id = fabric.get_workspace_id() workspace = fabric.resovle_workspace_name(workspace_id) else: @@ -335,25 +488,34 @@ def list_dashboards(workspace: Optional[str] = None): client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/groups/{workspace_id}/dashboards") - for v in response.json()['value']: - dashboardID = v['id'] - displayName = v['displayName'] - isReadOnly = v['isReadOnly'] - webURL = v['webUrl'] - embedURL = v['embedUrl'] - dataClass = v['dataClassification'] - users = v['users'] - subs = v['subscriptions'] - - new_data = {'Dashboard ID': dashboardID, 'Dashboard Name': displayName, 'Read Only': isReadOnly, 'Web URL': webURL, 'Embed URL': embedURL, 'Data Classification': dataClass, 'Users': [users], 'Subscriptions': [subs]} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for v in response.json()["value"]: + dashboardID = v["id"] + displayName = v["displayName"] + isReadOnly = v["isReadOnly"] + webURL = v["webUrl"] + embedURL = v["embedUrl"] + dataClass = v["dataClassification"] + users = v["users"] + subs = v["subscriptions"] + + new_data = { + "Dashboard ID": dashboardID, + "Dashboard Name": displayName, + "Read Only": isReadOnly, + "Web URL": webURL, + "Embed URL": embedURL, + "Data Classification": dataClass, + "Users": [users], + "Subscriptions": [subs], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Read Only'] = df['Read Only'].astype(bool) + df["Read Only"] = df["Read Only"].astype(bool) return df -def list_lakehouses(workspace: Optional[str] = None): +def list_lakehouses(workspace: Optional[str] = None): """ Shows the lakehouses within a workspace. @@ -363,43 +525,59 @@ def list_lakehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the lakehouses within a workspace. """ - df = pd.DataFrame(columns=['Lakehouse Name', 'Lakehouse ID', 'Description', 'OneLake Tables Path', 'OneLake Files Path', 'SQL Endpoint Connection String', 'SQL Endpoint ID', 'SQL Endpoint Provisioning Status']) - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + df = pd.DataFrame( + columns=[ + "Lakehouse Name", + "Lakehouse ID", + "Description", + "OneLake Tables Path", + "OneLake Files Path", + "SQL Endpoint Connection String", + "SQL Endpoint ID", + "SQL Endpoint Provisioning Status", + ] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/lakehouses/") - - for v in response.json()['value']: - lakehouseId = v['id'] - lakehouseName = v['displayName'] - lakehouseDesc = v['description'] - prop = v['properties'] - oneLakeTP = prop['oneLakeTablesPath'] - oneLakeFP = prop['oneLakeFilesPath'] - sqlEPProp = prop['sqlEndpointProperties'] - sqlEPCS = sqlEPProp['connectionString'] - sqlepid = sqlEPProp['id'] - sqlepstatus = sqlEPProp['provisioningStatus'] - - new_data = {'Lakehouse Name': lakehouseName, 'Lakehouse ID': lakehouseId, 'Description': lakehouseDesc, 'OneLake Tables Path': oneLakeTP, 'OneLake Files Path': oneLakeFP, 'SQL Endpoint Connection String': sqlEPCS, 'SQL Endpoint ID': sqlepid, 'SQL Endpoint Provisioning Status': sqlepstatus} + + for v in response.json()["value"]: + lakehouseId = v["id"] + lakehouseName = v["displayName"] + lakehouseDesc = v["description"] + prop = v["properties"] + oneLakeTP = prop["oneLakeTablesPath"] + oneLakeFP = prop["oneLakeFilesPath"] + sqlEPProp = prop["sqlEndpointProperties"] + sqlEPCS = sqlEPProp["connectionString"] + sqlepid = sqlEPProp["id"] + sqlepstatus = sqlEPProp["provisioningStatus"] + + new_data = { + "Lakehouse Name": lakehouseName, + "Lakehouse ID": lakehouseId, + "Description": lakehouseDesc, + "OneLake Tables Path": oneLakeTP, + "OneLake Files Path": oneLakeFP, + "SQL Endpoint Connection String": sqlEPCS, + "SQL Endpoint ID": sqlepid, + "SQL Endpoint Provisioning Status": sqlepstatus, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_warehouses(workspace: Optional[str] = None): +def list_warehouses(workspace: Optional[str] = None): """ Shows the warehouses within a workspace. @@ -409,40 +587,52 @@ def list_warehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the warehouses within a workspace. """ - df = pd.DataFrame(columns=['Warehouse Name', 'Warehouse ID', 'Description', 'Connection Info', 'Created Date', 'Last Updated Time']) + df = pd.DataFrame( + columns=[ + "Warehouse Name", + "Warehouse ID", + "Description", + "Connection Info", + "Created Date", + "Last Updated Time", + ] + ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/warehouses/") - - for v in response.json()['value']: - warehouse_id = v['id'] - warehouse_name = v['displayName'] - desc = v['description'] - prop = v['properties'] - connInfo = prop['connectionInfo'] - createdDate = prop['createdDate'] - lastUpdate = prop['lastUpdatedTime'] - - new_data = {'Warehouse Name': warehouse_name, 'Warehouse ID': warehouse_id, 'Description': desc, 'Connection Info': connInfo, 'Created Date': createdDate, 'Last Updated Time': lastUpdate} + + for v in response.json()["value"]: + warehouse_id = v["id"] + warehouse_name = v["displayName"] + desc = v["description"] + prop = v["properties"] + connInfo = prop["connectionInfo"] + createdDate = prop["createdDate"] + lastUpdate = prop["lastUpdatedTime"] + + new_data = { + "Warehouse Name": warehouse_name, + "Warehouse ID": warehouse_id, + "Description": desc, + "Connection Info": connInfo, + "Created Date": createdDate, + "Last Updated Time": lastUpdate, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_sqlendpoints(workspace: Optional[str] = None): +def list_sqlendpoints(workspace: Optional[str] = None): """ Shows the SQL Endpoints within a workspace. @@ -452,36 +642,36 @@ def list_sqlendpoints(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the SQL Endpoints within a workspace. """ - df = pd.DataFrame(columns=['SQL Endpoint ID', 'SQL Endpoint Name', 'Description']) + df = pd.DataFrame(columns=["SQL Endpoint ID", "SQL Endpoint Name", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/sqlEndpoints/") - - for v in response.json()['value']: - sql_id = v['id'] - lake_name = v['displayName'] - desc = v['description'] - new_data = {'SQL Endpoint ID': sql_id, 'SQL Endpoint Name': lake_name, 'Description': desc} + for v in response.json()["value"]: + sql_id = v["id"] + lake_name = v["displayName"] + desc = v["description"] + + new_data = { + "SQL Endpoint ID": sql_id, + "SQL Endpoint Name": lake_name, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mirroredwarehouses(workspace: Optional[str] = None): +def list_mirroredwarehouses(workspace: Optional[str] = None): """ Shows the mirrored warehouses within a workspace. @@ -491,36 +681,38 @@ def list_mirroredwarehouses(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the mirrored warehouses within a workspace. """ - df = pd.DataFrame(columns=['Mirrored Warehouse', 'Mirrored Warehouse ID', 'Description']) + df = pd.DataFrame( + columns=["Mirrored Warehouse", "Mirrored Warehouse ID", "Description"] + ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mirroredWarehouses/") - - for v in response.json()['value']: - mirr_id = v['id'] - dbname = v['displayName'] - desc = v['description'] - new_data = {'Mirrored Warehouse': dbname, 'Mirrored Warehouse ID': mirr_id, 'Description': desc} + for v in response.json()["value"]: + mirr_id = v["id"] + dbname = v["displayName"] + desc = v["description"] + + new_data = { + "Mirrored Warehouse": dbname, + "Mirrored Warehouse ID": mirr_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_kqldatabases(workspace: Optional[str] = None): +def list_kqldatabases(workspace: Optional[str] = None): """ Shows the KQL databases within a workspace. @@ -530,41 +722,55 @@ def list_kqldatabases(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the KQL Databases within a workspace. """ - df = pd.DataFrame(columns=['KQL Database Name', 'KQL Database ID', 'Description', 'Parent Eventhouse Item ID', 'Query Service URI', 'Ingestion Service URI', 'Kusto Database Type']) + df = pd.DataFrame( + columns=[ + "KQL Database Name", + "KQL Database ID", + "Description", + "Parent Eventhouse Item ID", + "Query Service URI", + "Ingestion Service URI", + "Kusto Database Type", + ] + ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlDatabases/") - - for v in response.json()['value']: - kql_id = v['id'] - kql_name = v['displayName'] - desc = v['description'] - prop = v['properties'] - eventId = prop['parentEventhouseItemId'] - qsURI = prop['queryServiceUri'] - isURI = prop['ingestionServiceUri'] - dbType = prop['kustoDatabaseType'] - - new_data = {'KQL Database Name': kql_name, 'KQL Database ID': kql_id, 'Description': desc, 'Parent Eventhouse Item ID': eventId, 'Query Service URI': qsURI, 'Ingestion Service URI': isURI, 'Kusto Database Type': dbType} + + for v in response.json()["value"]: + kql_id = v["id"] + kql_name = v["displayName"] + desc = v["description"] + prop = v["properties"] + eventId = prop["parentEventhouseItemId"] + qsURI = prop["queryServiceUri"] + isURI = prop["ingestionServiceUri"] + dbType = prop["kustoDatabaseType"] + + new_data = { + "KQL Database Name": kql_name, + "KQL Database ID": kql_id, + "Description": desc, + "Parent Eventhouse Item ID": eventId, + "Query Service URI": qsURI, + "Ingestion Service URI": isURI, + "Kusto Database Type": dbType, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_kqlquerysets(workspace: Optional[str] = None): +def list_kqlquerysets(workspace: Optional[str] = None): """ Shows the KQL Querysets within a workspace. @@ -574,36 +780,36 @@ def list_kqlquerysets(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the KQL Querysets within a workspace. """ - df = pd.DataFrame(columns=['KQL Queryset Name', 'KQL Queryset ID', 'Description']) + df = pd.DataFrame(columns=["KQL Queryset Name", "KQL Queryset ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/kqlQuerysets/") - - for v in response.json()['value']: - kql_id = v['id'] - kql_name = v['displayName'] - desc = v['description'] - new_data = {'KQL Queryset Name': kql_name, 'KQL Queryset ID': kql_id, 'Description': desc} + for v in response.json()["value"]: + kql_id = v["id"] + kql_name = v["displayName"] + desc = v["description"] + + new_data = { + "KQL Queryset Name": kql_name, + "KQL Queryset ID": kql_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mlmodels(workspace: Optional[str] = None): +def list_mlmodels(workspace: Optional[str] = None): """ Shows the ML models within a workspace. @@ -613,36 +819,36 @@ def list_mlmodels(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the ML models within a workspace. """ - df = pd.DataFrame(columns=['ML Model Name', 'ML Model ID', 'Description']) + df = pd.DataFrame(columns=["ML Model Name", "ML Model ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlModels/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'ML Model Name': modelName, 'ML Model ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "ML Model Name": modelName, + "ML Model ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_eventstreams(workspace: Optional[str] = None): +def list_eventstreams(workspace: Optional[str] = None): """ Shows the eventstreams within a workspace. @@ -652,36 +858,36 @@ def list_eventstreams(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the eventstreams within a workspace. """ - df = pd.DataFrame(columns=['Eventstream Name', 'Eventstream ID', 'Description']) + df = pd.DataFrame(columns=["Eventstream Name", "Eventstream ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/eventstreams/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Eventstream Name': modelName, 'Eventstream ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Eventstream Name": modelName, + "Eventstream ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_datapipelines(workspace: Optional[str] = None): +def list_datapipelines(workspace: Optional[str] = None): """ Shows the data pipelines within a workspace. @@ -691,36 +897,36 @@ def list_datapipelines(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the data pipelines within a workspace. """ - df = pd.DataFrame(columns=['Data Pipeline Name', 'Data Pipeline ID', 'Description']) + df = pd.DataFrame(columns=["Data Pipeline Name", "Data Pipeline ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/dataPipelines/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Data Pipeline Name': modelName, 'Data Pipeline ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Data Pipeline Name": modelName, + "Data Pipeline ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_mlexperiments(workspace: Optional[str] = None): +def list_mlexperiments(workspace: Optional[str] = None): """ Shows the ML experiments within a workspace. @@ -730,36 +936,36 @@ def list_mlexperiments(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the ML experiments within a workspace. """ - df = pd.DataFrame(columns=['ML Experiment Name', 'ML Experiment ID', 'Description']) + df = pd.DataFrame(columns=["ML Experiment Name", "ML Experiment ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/mlExperiments/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'ML Experiment Name': modelName, 'ML Experiment ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "ML Experiment Name": modelName, + "ML Experiment ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def list_datamarts(workspace: Optional[str] = None): +def list_datamarts(workspace: Optional[str] = None): """ Shows the datamarts within a workspace. @@ -769,36 +975,38 @@ def list_datamarts(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the datamarts within a workspace. """ - df = pd.DataFrame(columns=['Datamart Name', 'Datamart ID', 'Description']) + df = pd.DataFrame(columns=["Datamart Name", "Datamart ID", "Description"]) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/datamarts/") - - for v in response.json()['value']: - model_id = v['id'] - modelName = v['displayName'] - desc = v['description'] - new_data = {'Datamart Name': modelName, 'Datamart ID': model_id, 'Description': desc} + for v in response.json()["value"]: + model_id = v["id"] + modelName = v["displayName"] + desc = v["description"] + + new_data = { + "Datamart Name": modelName, + "Datamart ID": model_id, + "Description": desc, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def create_warehouse(warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None): +def create_warehouse( + warehouse: str, description: Optional[str] = None, workspace: Optional[str] = None +): """ Creates a Fabric warehouse. @@ -812,48 +1020,53 @@ def create_warehouse(warehouse: str, description: Optional[str] = None, workspac The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) if description == None: - request_body = { - "displayName": warehouse - } + request_body = {"displayName": warehouse} else: - request_body = { - "displayName": warehouse, - "description": description - } + request_body = {"displayName": warehouse, "description": description} client = fabric.FabricRestClient() - response = client.post(f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body) + response = client.post( + f"/v1/workspaces/{workspace_id}/warehouses/", json=request_body + ) if response.status_code == 201: - print(f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace.") + print( + f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + ) elif response.status_code == 202: - operationId = response.headers['x-ms-operation-id'] + operationId = response.headers["x-ms-operation-id"] response = client.get(f"/v1/operations/{operationId}") - response_body = json.loads(response.content) - while response_body['status'] != 'Succeeded': + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": time.sleep(3) response = client.get(f"/v1/operations/{operationId}") response_body = json.loads(response.content) response = client.get(f"/v1/operations/{operationId}/result") - print(f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace.") + print( + f"The '{warehouse}' warehouse has been created within the '{workspace}' workspace." + ) else: - print(f"ERROR: Failed to create the '{warehouse}' warehouse within the '{workspace}' workspace.") - -def update_item(item_type: str, current_name: str, new_name: str, description: Optional[str] = None, workspace:Optional[str] = None): - + print( + f"ERROR: Failed to create the '{warehouse}' warehouse within the '{workspace}' workspace." + ) + + +def update_item( + item_type: str, + current_name: str, + new_name: str, + description: Optional[str] = None, + workspace: Optional[str] = None, +): """ Updates the name/description of a Fabric item. @@ -871,70 +1084,69 @@ def update_item(item_type: str, current_name: str, new_name: str, description: O The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) itemTypes = { - 'DataPipeline': 'dataPipelines', - 'Eventstream': 'eventstreams', - 'KQLDatabase': 'kqlDatabases', - 'KQLQueryset': 'kqlQuerysets', - 'Lakehouse': 'lakehouses', - 'MLExperiment': 'mlExperiments', - 'MLModel': 'mlModels', - 'Notebook': 'notebooks', - 'Warehouse': 'warehouses', + "DataPipeline": "dataPipelines", + "Eventstream": "eventstreams", + "KQLDatabase": "kqlDatabases", + "KQLQueryset": "kqlQuerysets", + "Lakehouse": "lakehouses", + "MLExperiment": "mlExperiments", + "MLModel": "mlModels", + "Notebook": "notebooks", + "Warehouse": "warehouses", } - item_type = item_type.replace(' ','').capitalize() + item_type = item_type.replace(" ", "").capitalize() if item_type not in itemTypes.keys(): print(f"The '{item_type}' is not a valid item type. ") return - + itemType = itemTypes[item_type] - dfI = fabric.list_items(workspace = workspace, type = item_type) - dfI_filt = dfI[(dfI['Display Name'] == current_name)] + dfI = fabric.list_items(workspace=workspace, type=item_type) + dfI_filt = dfI[(dfI["Display Name"] == current_name)] if len(dfI_filt) == 0: - print(f"The '{current_name}' {item_type} does not exist within the '{workspace}' workspace.") + print( + f"The '{current_name}' {item_type} does not exist within the '{workspace}' workspace." + ) return - - itemId = dfI_filt['Id'].iloc[0] + + itemId = dfI_filt["Id"].iloc[0] if description == None: - request_body = { - "displayName": new_name - } + request_body = {"displayName": new_name} else: - request_body = { - "displayName": new_name, - "description": description - } + request_body = {"displayName": new_name, "description": description} client = fabric.FabricRestClient() - response = client.patch(f"/v1/workspaces/{workspace_id}/{itemType}/{itemId}", json=request_body) + response = client.patch( + f"/v1/workspaces/{workspace_id}/{itemType}/{itemId}", json=request_body + ) if response.status_code == 200: if description == None: - print(f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}'") + print( + f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}'" + ) else: - print(f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}' and have a description of '{description}'") + print( + f"The '{current_name}' {item_type} within the '{workspace}' workspace has been updated to be named '{new_name}' and have a description of '{description}'" + ) else: - print(f"ERROR: The '{current_name}' {item_type} within the '{workspace}' workspace was not updateds.") + print( + f"ERROR: The '{current_name}' {item_type} within the '{workspace}' workspace was not updateds." + ) -def list_relationships(dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False): +def list_relationships( + dataset: str, workspace: Optional[str] = None, extended: Optional[bool] = False +): """ Shows a semantic model's relationships and their properties. @@ -948,7 +1160,7 @@ def list_relationships(dataset: str, workspace: Optional[str] = None, extended: or if no lakehouse attached, resolves to the workspace of the notebook. extended : bool, default=False Fetches extended column information. - + Returns ------- pandas.DataFrame @@ -958,83 +1170,104 @@ def list_relationships(dataset: str, workspace: Optional[str] = None, extended: if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - - dfR = fabric.list_relationships(dataset = dataset, workspace = workspace) + + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) if extended: # Used to map the Relationship IDs rel = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [ID] AS [RelationshipID] ,[Name] FROM $SYSTEM.TMSCHEMA_RELATIONSHIPS - """) + """, + ) # USED_SIZE shows the Relationship Size where TABLE_ID starts with R$ cs = fabric.evaluate_dax( - dataset = dataset, workspace = workspace, dax_string = - """ + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TABLE_ID] ,[USED_SIZE] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMN_SEGMENTS - """) + """, + ) def parse_value(text): - ind = text.rfind('(') + 1 + ind = text.rfind("(") + 1 output = text[ind:] output = output[:-1] return output - cs['RelationshipID'] = cs['TABLE_ID'].apply(parse_value).astype('uint64') - relcs = pd.merge(cs[['RelationshipID', 'TABLE_ID', 'USED_SIZE']], rel, on='RelationshipID', how='left') + cs["RelationshipID"] = cs["TABLE_ID"].apply(parse_value).astype("uint64") + relcs = pd.merge( + cs[["RelationshipID", "TABLE_ID", "USED_SIZE"]], + rel, + on="RelationshipID", + how="left", + ) - dfR['Used Size'] = None + dfR["Used Size"] = None for i, r in dfR.iterrows(): - relName = r['Relationship Name'] + relName = r["Relationship Name"] + + filtered_cs = relcs[ + (relcs["Name"] == relName) & (relcs["TABLE_ID"].str.startswith("R$")) + ] + sumval = filtered_cs["USED_SIZE"].sum() + dfR.at[i, "Used Size"] = sumval - filtered_cs = relcs[(relcs['Name'] == relName) & (relcs['TABLE_ID'].str.startswith("R$"))] - sumval = filtered_cs['USED_SIZE'].sum() - dfR.at[i, 'Used Size'] = sumval + dfR["Used Size"] = dfR["Used Size"].astype("int") - dfR['Used Size'] = dfR['Used Size'].astype('int') - return dfR -def list_dataflow_storage_accounts(): +def list_dataflow_storage_accounts(): """ Shows the accessible dataflow storage accounts. Parameters ---------- - + Returns ------- pandas.DataFrame A pandas dataframe showing the accessible dataflow storage accounts. """ - df = pd.DataFrame(columns=['Dataflow Storage Account ID', 'Dataflow Storage Account Name', 'Enabled']) + df = pd.DataFrame( + columns=[ + "Dataflow Storage Account ID", + "Dataflow Storage Account Name", + "Enabled", + ] + ) client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/dataflowStorageAccounts") - - for v in response.json()['value']: - dfsaId = v['id'] - dfsaName = v['name'] - isEnabled = v['isEnabled'] - - new_data = {'Dataflow Storage Account ID': dfsaId, 'Dataflow Storage Account Name': dfsaName, 'Enabled': isEnabled} + + for v in response.json()["value"]: + dfsaId = v["id"] + dfsaName = v["name"] + isEnabled = v["isEnabled"] + + new_data = { + "Dataflow Storage Account ID": dfsaId, + "Dataflow Storage Account Name": dfsaName, + "Enabled": isEnabled, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['Enabled'] = df['Enabled'].astype(bool) + df["Enabled"] = df["Enabled"].astype(bool) return df -def list_kpis(dataset: str, workspace: Optional[str] = None): +def list_kpis(dataset: str, workspace: Optional[str] = None): """ Shows a semantic model's KPIs and their properties. @@ -1053,22 +1286,52 @@ def list_kpis(dataset: str, workspace: Optional[str] = None): A pandas dataframe showing the KPIs for the semantic model. """ - from .TOM import connect_semantic_model - - with connect_semantic_model(dataset = dataset, workspace = workspace, readonly = True) as tom: - - df = pd.DataFrame(columns=['Table Name', 'Measure Name', 'Target Expression', 'Target Format String', 'Target Description', 'Status Expression', 'Status Graphic', 'Status Description', 'Trend Expression', 'Trend Graphic', 'Trend Description']) + from ._tom import connect_semantic_model + + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + + df = pd.DataFrame( + columns=[ + "Table Name", + "Measure Name", + "Target Expression", + "Target Format String", + "Target Description", + "Status Expression", + "Status Graphic", + "Status Description", + "Trend Expression", + "Trend Graphic", + "Trend Description", + ] + ) for t in tom.model.Tables: for m in t.Measures: if m.KPI is not None: - new_data = {'Table Name': t.Name, 'Measure Name': m.Name, 'Target Expression': m.KPI.TargetExpression, 'Target Format String': m.KPI.TargetFormatString, 'Target Description': m.KPI.TargetDescription, 'Status Graphic': m.KPI.StatusGraphic, 'Status Expression': m.KPI.StatusExpression, 'Status Description': m.KPI.StatusDescription, 'Trend Expression': m.KPI.TrendExpression, 'Trend Graphic': m.KPI.TrendGraphic, 'Trend Description': m.KPI.TrendDescription} - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + new_data = { + "Table Name": t.Name, + "Measure Name": m.Name, + "Target Expression": m.KPI.TargetExpression, + "Target Format String": m.KPI.TargetFormatString, + "Target Description": m.KPI.TargetDescription, + "Status Graphic": m.KPI.StatusGraphic, + "Status Expression": m.KPI.StatusExpression, + "Status Description": m.KPI.StatusDescription, + "Trend Expression": m.KPI.TrendExpression, + "Trend Graphic": m.KPI.TrendGraphic, + "Trend Description": m.KPI.TrendDescription, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df - -def list_workspace_role_assignments(workspace: Optional[str] = None): + +def list_workspace_role_assignments(workspace: Optional[str] = None): """ Shows the members of a given workspace. @@ -1078,31 +1341,32 @@ def list_workspace_role_assignments(workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- pandas.DataFrame A pandas dataframe showing the members of a given workspace and their roles. """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - df = pd.DataFrame(columns=['User Name', 'User Email', 'Role Name', 'Type']) + df = pd.DataFrame(columns=["User Name", "User Email", "Role Name", "Type"]) client = fabric.FabricRestClient() response = client.get(f"/v1/workspaces/{workspace_id}/roleAssignments") - for i in response.json()['value']: - user_name = i['principal']['displayName'] - role_name = i['role'] - user_email = i['principal']['userDetails']['userPrincipalName'] - user_type = i['principal']['type'] - - new_data = {'User Name': user_name, 'Role Name': role_name, 'Type': user_type, 'User Email': user_email} + for i in response.json()["value"]: + user_name = i["principal"]["displayName"] + role_name = i["role"] + user_email = i["principal"]["userDetails"]["userPrincipalName"] + user_type = i["principal"]["type"] + + new_data = { + "User Name": user_name, + "Role Name": role_name, + "Type": user_type, + "User Email": user_email, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/src/sempy_labs/_model_auto_build.py b/src/sempy_labs/_model_auto_build.py new file mode 100644 index 00000000..0fa7070e --- /dev/null +++ b/src/sempy_labs/_model_auto_build.py @@ -0,0 +1,143 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._tom import connect_semantic_model +from sempy_labs._generate_semantic_model import create_blank_semantic_model +from sempy_labs.directlake._get_shared_expression import get_shared_expression +from typing import List, Optional, Union +from sempy._utils._log import log + + +@log +def model_auto_build( + dataset: str, + file_path: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Dynamically generates a semantic model based on an Excel file template. + + Parameters + ---------- + dataset : str + Name of the semantic model. + file_path : str + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + if workspace is None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if lakehouse_workspace is None: + lakehouse_workspace = workspace + + sheets = [ + "Model", + "Tables", + "Measures", + "Columns", + "Roles", + "Hierarchies", + "Relationships", + ] + + create_blank_semantic_model(dataset=dataset, workspace=workspace) + + with connect_semantic_model(dataset=dataset, workspace=workspace) as tom: + + # DL Only + expr = get_shared_expression(lakehouse=lakehouse, workspace=lakehouse_workspace) + tom.add_expression(name="DatbaseQuery", expression=expr) + + for sheet in sheets: + df = pd.read_excel(file_path, sheet_name=sheet) + + if sheet == "Tables": + for i, r in df.iterrows(): + tName = r["Table Name"] + desc = r["Description"] + dc = r["Data Category"] + mode = r["Mode"] + hidden = bool(r["Hidden"]) + + tom.add_table( + name=tName, description=desc, data_category=dc, hidden=hidden + ) + if mode == "DirectLake": + tom.add_entity_partition(table_name=tName, entity_name=tName) + elif sheet == "Columns": + for i, r in df.iterrows(): + tName = r["Table Name"] + cName = r["Column Name"] + scName = r["Source Column"] + dataType = r["Data Type"] + hidden = bool(r["Hidden"]) + key = bool(r["Key"]) + if dataType == "Integer": + dataType = "Int64" + desc = r["Description"] + + tom.add_data_column( + table_name=tName, + column_name=cName, + source_column=scName, + data_type=dataType, + description=desc, + hidden=hidden, + key=key, + ) + elif sheet == "Measures": + for i, r in df.iterrows(): + tName = r["Table Name"] + mName = r["Measure Name"] + expr = r["Expression"] + desc = r["Description"] + format = r["Format String"] + hidden = bool(r["Hidden"]) + + tom.add_measure( + table_name=tName, + measure_name=mName, + expression=expr, + format_string=format, + description=desc, + hidden=hidden, + ) + elif sheet == "Relationships": + for i, r in df.iterrows(): + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + fromCard = r["From Cardinality"] + toCard = r["To Cardinality"] + + tom.add_relationship( + from_table=fromTable, + from_column=fromColumn, + to_table=toTable, + to_column=toColumn, + from_cardinality=fromCard, + to_cardinality=toCard, + ) + elif sheet == "Roles": + print("hi") + elif sheet == "Hierarchies": + print("hi") diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py new file mode 100644 index 00000000..dfd3a61e --- /dev/null +++ b/src/sempy_labs/_model_bpa.py @@ -0,0 +1,1354 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import re, unicodedata, warnings, datetime +import numpy as np +from IPython.display import display, HTML +from pyspark.sql import SparkSession +from sempy_labs._model_dependencies import get_measure_dependencies +from sempy_labs._helper_functions import format_dax_object_name, resolve_lakehouse_name +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from typing import List, Optional, Union +from sempy._utils._log import log + + +def model_bpa_rules(): + """ + Shows the default rules for the semantic model BPA used by the run_model_bpa function. + + Parameters + ---------- + + + Returns + ------- + pandas.DataFrame + A pandas dataframe containing the default rules for the run_model_bpa function. + """ + + df_rules = pd.DataFrame( + [ + ( + "Performance", + "Column", + "Warning", + "Do not use floating point data types", + lambda df: df["Data Type"] == "Double", + 'The "Double" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use "Int64" or "Decimal" where appropriate (but note that "Decimal" is limited to 4 digits after the decimal sign).', + ), + ( + "Performance", + "Column", + "Warning", + "Avoid using calculated columns", + lambda df: df["Type"] == "Calculated", + "Calculated columns do not compress as well as data columns so they take up more memory. They also slow down processing times for both the table as well as process recalc. Offload calculated column logic to your data warehouse and turn these calculated columns into data columns.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "Performance", + "Relationship", + "Warning", + "Check if bi-directional and many-to-many relationships are valid", + lambda df: (df["Multiplicity"] == "m:m") + | (df["Cross Filtering Behavior"] == "BothDirections"), + "Bi-directional and many-to-many relationships may cause performance degradation or even have unintended consequences. Make sure to check these specific relationships to ensure they are working as designed and are actually necessary.", + "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax", + ), + ( + "Performance", + "Row Level Security", + "Info", + "Check if dynamic row level security (RLS) is necessary", + lambda df: df["Is Dynamic"], + "Usage of dynamic row level security (RLS) can add memory and performance overhead. Please research the pros/cons of using it.", + "https://docs.microsoft.com/power-bi/admin/service-admin-rls", + ), + ( + "Performance", + "Table", + "Warning", + "Avoid using many-to-many relationships on tables used for dynamic row level security", + lambda df: (df["Used in M2M Relationship"] == True) + & (df["Used in Dynamic RLS"] == True), + "Using many-to-many relationships on tables which use dynamic row level security can cause serious query performance degradation. This pattern's performance problems compound when snowflaking multiple many-to-many relationships against a table which contains row level security. Instead, use one of the patterns shown in the article below where a single dimension table relates many-to-one to a security table.", + "https://www.elegantbi.com/post/dynamicrlspatterns", + ), + ( + "Performance", + "Relationship", + "Warning", + "Many-to-many relationships should be single-direction", + lambda df: (df["Multiplicity"] == "m:m") + & (df["Cross Filtering Behavior"] == "BothDirections"), + ), + ( + "Performance", + "Column", + "Warning", + "Set IsAvailableInMdx to false on non-attribute columns", + lambda df: (df["Is Direct Lake"] == False) + & (df["Is Available in MDX"] == True) + & ((df["Hidden"] == True) | (df["Parent Is Hidden"] == True)) + & (df["Used in Sort By"] == False) + & (df["Used in Hierarchy"] == False) + & (df["Sort By Column"] == None), + "To speed up processing time and conserve memory after processing, attribute hierarchies should not be built for columns that are never used for slicing by MDX clients. In other words, all hidden columns that are not used as a Sort By Column or referenced in user hierarchies should have their IsAvailableInMdx property set to false. The IsAvailableInMdx property is not relevant for Direct Lake models.", + "https://blog.crossjoin.co.uk/2018/07/02/isavailableinmdx-ssas-tabular", + ), + # ('Performance', 'Partition', 'Warning', "Set 'Data Coverage Definition' property on the DirectQuery partition of a hybrid table", + # lambda df: (df['Data Coverage Definition Expression'].isnull()) & (df['Mode'] == 'DirectQuery') & (df['Import Partitions'] > 0) & (df['Has Date Table']), + # "Setting the 'Data Coverage Definition' property may lead to better performance because the engine knows when it can only query the import-portion of the table and when it needs to query the DirectQuery portion of the table.", + # "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions", + # ), + ( + "Performance", + "Table", + "Warning", + "Set dimensions tables to dual mode instead of import when using DirectQuery on fact tables", + lambda df: (df["Import Partitions"] == 1) + & (df["Model Has DQ"]) + & (df["Used in Relationship x:1"]), + "https://learn.microsoft.com/power-bi/transform-model/desktop-storage-mode#propagation-of-the-dual-setting", + ), + ( + "Performance", + "Partition", + "Warning", + "Minimize Power Query transformations", + lambda df: (df["Source Type"] == "M") + & ( + ('Table.Combine("' in df["Query"]) + | ('Table.Join("' in df["Query"]) + | ('Table.NestedJoin("' in df["Query"]) + | ('Table.AddColumn("' in df["Query"]) + | ('Table.Group("' in df["Query"]) + | ('Table.Sort("' in df["Query"]) + | ('Table.Sort("' in df["Query"]) + | ('Table.Pivot("' in df["Query"]) + | ('Table.Unpivot("' in df["Query"]) + | ('Table.UnpivotOtherColumns("' in df["Query"]) + | ('Table.Distinct("' in df["Query"]) + | ('[Query=(""SELECT' in df["Query"]) + | ("Value.NativeQuery" in df["Query"]) + | ("OleDb.Query" in df["Query"]) + | ("Odbc.Query" in df["Query"]) + ), + "Minimize Power Query transformations in order to improve model processing performance. It is a best practice to offload these transformations to the data warehouse if possible. Also, please check whether query folding is occurring within your model. Please reference the article below for more information on query folding.", + "https://docs.microsoft.com/power-query/power-query-folding", + ), + ( + "Performance", + "Table", + "Warning", + "Consider a star-schema instead of a snowflake architecture", + lambda df: (df["Type"] != "Calculation Group") + & df["Used in Relationship Both Sides"], + "Generally speaking, a star-schema is the optimal architecture for tabular models. That being the case, there are valid cases to use a snowflake approach. Please check your model and consider moving to a star-schema architecture.", + "https://docs.microsoft.com/power-bi/guidance/star-schema", + ), + ( + "Performance", + "Table", + "Warning", + "Reduce usage of calculated tables", + lambda df: df["Type"] == "Calculated Table", + "Migrate calculated table logic to your data warehouse. Reliance on calculated tables will lead to technical debt and potential misalignments if you have multiple models on your platform.", + ), + ( + "Performance", + "Column", + "Warning", + "Reduce usage of calculated columns that use the RELATED function", + lambda df: (df["Type"] == "Calculated") + & (df["Source"].str.contains(r"related\s*\(", case=False)), + "Calculated columns do not compress as well as data columns and may cause longer processing times. As such, calculated columns should be avoided if possible. One scenario where they may be easier to avoid is if they use the RELATED function.", + "https://www.sqlbi.com/articles/storage-differences-between-calculated-columns-and-calculated-tables", + ), + ( + "Performance", + "Model", + "Warning", + "Avoid excessive bi-directional or many-to-many relationships", + lambda df: ( + df["M2M or BiDi Relationship Count"] / df["Relationship Count"] + ) + > 0.3, + "Limit use of b-di and many-to-many relationships. This rule flags the model if more than 30% of relationships are bi-di or many-to-many.", + "https://www.sqlbi.com/articles/bidirectional-relationships-and-ambiguity-in-dax", + ), + ( + "Performance", + "Column", + "Warning", + "Avoid bi-directional or many-to-many relationships against high-cardinality columns", + lambda df: df["Used in M2M/BiDi Relationship"] + & df["Column Cardinality"] + > 100000, + "For best performance, it is recommended to avoid using bi-directional relationships against high-cardinality columns", + ), + ( + "Performance", + "Table", + "Warning", + "Remove auto-date table", + lambda df: (df["Type"] == "Calculated Table") + & ( + (df["Name"].str.startswith("DateTableTemplate_")) + | (df["Name"].str.startswith("LocalDateTable_")) + ), + "Avoid using auto-date tables. Make sure to turn off auto-date table in the settings in Power BI Desktop. This will save memory resources.", + "https://www.youtube.com/watch?v=xu3uDEHtCrg", + ), + ( + "Performance", + "Table", + "Warning", + "Date/calendar tables should be marked as a date table", + lambda df: ( + (df["Name"].str.contains(r"date", case=False)) + | (df["Name"].str.contains(r"calendar", case=False)) + ) + & (df["Data Category"] != "Time"), + "This rule looks for tables that contain the words 'date' or 'calendar' as they should likely be marked as a date table.", + "https://docs.microsoft.com/power-bi/transform-model/desktop-date-tables", + ), + ( + "Performance", + "Table", + "Warning", + "Large tables should be partitioned", + lambda df: (df["Is Direct Lake"] == False) + & (df["Partition Count"] == 1) + & (df["Row Count"] > 25000000), + "Large tables should be partitioned in order to optimize processing. This is not relevant for semantic models in Direct Lake mode as they can only have one partition per table.", + ), + ( + "Performance", + "Row Level Security", + "Warning", + "Limit row level security (RLS) logic", + lambda df: df["Filter Expression"].str.contains( + "|".join(["right", "left", "filter", "upper", "lower", "find"]), + case=False, + ), + "Try to simplify the DAX used for row level security. Usage of the functions within this rule can likely be offloaded to the upstream systems (data warehouse).", + ), + ( + "Performance", + "Model", + "Warning", + "Model should have a date table", + lambda df: df["Has Date Table"], + "Generally speaking, models should generally have a date table. Models that do not have a date table generally are not taking advantage of features such as time intelligence or may not have a properly structured architecture.", + ), + ( + "Performance", + "Measure", + "Warning", + "Measures using time intelligence and model is using Direct Query", + lambda df: df["DQ Date Function Used"], + "At present, time intelligence functions are known to not perform as well when using Direct Query. If you are having performance issues, you may want to try alternative solutions such as adding columns in the fact table that show previous year or previous month data.", + ), + ( + "Error Prevention", + "Calculation Item", + "Error", + "Calculation items must have an expression", + lambda df: df["Expression"].str.len() == 0, + "Calculation items must have an expression. Without an expression, they will not show any values.", + ), + ( + "Error Prevention", + ["Table", "Column", "Measure", "Hierarchy", "Partition"], + "Error", + "Avoid invalid characters in names", + lambda df: df["Name"].apply( + lambda x: any( + unicodedata.category(char) == "Cc" and not char.isspace() + for char in x + ) + ), + "This rule identifies if a name for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.", + ), + ( + "Error Prevention", + ["Table", "Column", "Measure", "Hierarchy"], + "Error", + "Avoid invalid characters in descriptions", + lambda df: df["Description"].apply( + lambda x: any( + unicodedata.category(char) == "Cc" and not char.isspace() + for char in x + ) + ), + "This rule identifies if a description for a given object in your model (i.e. table/column/measure) which contains an invalid character. Invalid characters will cause an error when deploying the model (and failure to deploy). This rule has a fix expression which converts the invalid character into a space, resolving the issue.", + ), + ( + "Error Prevention", + "Relationship", + "Warning", + "Relationship columns should be of the same data type", + lambda df: df["From Column Data Type"] != df["To Column Data Type"], + "Columns used in a relationship should be of the same data type. Ideally, they will be of integer data type (see the related rule '[Formatting] Relationship columns should be of integer data type'). Having columns within a relationship which are of different data types may lead to various issues.", + ), + ( + "Error Prevention", + "Column", + "Error", + "Data columns must have a source column", + lambda df: (df["Type"] == "Data") & (df["Source"].str.len() == 0), + "Data columns must have a source column. A data column without a source column will cause an error when processing the model.", + ), + ( + "Error Prevention", + "Column", + "Warning", + "Set IsAvailableInMdx to true on necessary columns", + lambda df: (df["Is Direct Lake"] == False) + & (df["Is Available in MDX"] == False) + & ( + (df["Used in Sort By"] == True) + | (df["Used in Hierarchy"] == True) + | (df["Sort By Column"] != None) + ), + "In order to avoid errors, ensure that attribute hierarchies are enabled if a column is used for sorting another column, used in a hierarchy, used in variations, or is sorted by another column. The IsAvailableInMdx property is not relevant for Direct Lake models.", + ), + ( + "Error Prevention", + "Table", + "Error", + "Avoid the USERELATIONSHIP function and RLS against the same table", + lambda df: (df["USERELATIONSHIP Used"] == True) + & (df["Used in RLS"] == True), + "The USERELATIONSHIP function may not be used against a table which also leverages row-level security (RLS). This will generate an error when using the particular measure in a visual. This rule will highlight the table which is used in a measure's USERELATIONSHIP function as well as RLS.", + "https://blog.crossjoin.co.uk/2013/05/10/userelationship-and-tabular-row-security", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid using the IFERROR function", + lambda df: df["Measure Expression"].str.contains( + r"irerror\s*\(", case=False + ), + "Avoid using the IFERROR function as it may cause performance degradation. If you are concerned about a divide-by-zero error, use the DIVIDE function as it naturally resolves such errors as blank (or you can customize what should be shown in case of such an error).", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Use the TREATAS function instead of INTERSECT for virtual relationships", + lambda df: df["Measure Expression"].str.contains( + r"intersect\s*\(", case=False + ), + "The TREATAS function is more efficient and provides better performance than the INTERSECT function when used in virutal relationships.", + "https://www.sqlbi.com/articles/propagate-filters-using-treatas-in-dax", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "The EVALUATEANDLOG function should not be used in production models", + lambda df: df["Measure Expression"].str.contains( + r"evaluateandlog\s*\(", case=False + ), + "The EVALUATEANDLOG function is meant to be used only in development/test environments and should not be used in production models.", + "https://pbidax.wordpress.com/2022/08/16/introduce-the-dax-evaluateandlog-function", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Measures should not be direct references of other measures", + lambda df: df["Measure Expression"] + .str.strip() + .isin(df["Measure Object"]), + "This rule identifies measures which are simply a reference to another measure. As an example, consider a model with two measures: [MeasureA] and [MeasureB]. This rule would be triggered for MeasureB if MeasureB's DAX was MeasureB:=[MeasureA]. Such duplicative measures should be removed.", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "No two measures should have the same definition", + lambda df: df["Measure Expression"] + .apply(lambda x: re.sub(r"\s+", "", x)) + .duplicated(keep=False), + "Two measures with different names and defined by the same DAX expression should be avoided to reduce redundancy.", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid addition or subtraction of constant values to results of divisions", + lambda df: df["Measure Expression"].str.contains( + "(?i)DIVIDE\\s*\\((\\s*.*?)\\)\\s*[+-]\\s*1" + or "\\/\\s*.*(?=[-+]\\s*1)", + regex=True, + ), + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Avoid using '1-(x/y)' syntax", + lambda df: df["Measure Expression"].str.contains( + "[0-9]+\\s*[-+]\\s*[\\(]*\\s*(?i)SUM\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*\\[[A-Za-z0-9 _]+\\]\\s*\\)\\s*\\/" + or "[0-9]+\\s*[-+]\\s*(?i)DIVIDE\\s*\\(", + regex=True, + ), + "Instead of using the '1-(x/y)' or '1+(x/y)' syntax to achieve a percentage calculation, use the basic DAX functions (as shown below). Using the improved syntax will generally improve the performance. The '1+/-...' syntax always returns a value whereas the solution without the '1+/-...' does not (as the value may be 'blank'). Therefore the '1+/-...' syntax may return more rows/columns which may result in a slower query speed. Let's clarify with an example: Avoid this: 1 - SUM ( 'Sales'[CostAmount] ) / SUM( 'Sales'[SalesAmount] ) Better: DIVIDE ( SUM ( 'Sales'[SalesAmount] ) - SUM ( 'Sales'[CostAmount] ), SUM ( 'Sales'[SalesAmount] ) ) Best: VAR x = SUM ( 'Sales'[SalesAmount] ) RETURN DIVIDE ( x - SUM ( 'Sales'[CostAmount] ), x )", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Filter measure values by columns, not tables", + lambda df: df["Measure Expression"].str.contains( + "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[[^\\]]+\\]" + or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*\\[", + regex=True, + ), + "Instead of using this pattern FILTER('Table',[Measure]>Value) for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below (if possible). Filtering on a specific column will produce a smaller table for the engine to process, thereby enabling faster performance. Using the VALUES function or the ALL function depends on the desired measure result.\nOption 1: FILTER(VALUES('Table'[Column]),[Measure] > Value)\nOption 2: FILTER(ALL('Table'[Column]),[Measure] > Value)", + "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Filter column values with proper syntax", + lambda df: df["Measure Expression"].str.contains( + "(?i)CALCULATE\\s*\\(\\s*[^,]+,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]" + or "(?i)CALCULATETABLE\\s*\\([^,]*,\\s*(?i)FILTER\\s*\\(\\s*'*[A-Za-z0-9 _]+'*\\s*,\\s*'*[A-Za-z0-9 _]+'*\\[[A-Za-z0-9 _]+\\]", + regex=True, + ), + "Instead of using this pattern FILTER('Table','Table'[Column]=\"Value\") for the filter parameters of a CALCULATE or CALCULATETABLE function, use one of the options below. As far as whether to use the KEEPFILTERS function, see the second reference link below.\nOption 1: KEEPFILTERS('Table'[Column]=\"Value\")\nOption 2: 'Table'[Column]=\"Value\"", + "https://docs.microsoft.com/power-bi/guidance/dax-avoid-avoid-filter-as-filter-argument Reference: https://www.sqlbi.com/articles/using-keepfilters-in-dax", + ), + ( + "DAX Expressions", + "Measure", + "Warning", + "Use the DIVIDE function for division", + lambda df: df["Measure Expression"].str.contains( + '\\]\\s*\\/(?!\\/)(?!\\*)" or "\\)\\s*\\/(?!\\/)(?!\\*)', regex=True + ), + 'Use the DIVIDE function instead of using "/". The DIVIDE function resolves divide-by-zero cases. As such, it is recommended to use to avoid errors.', + "https://docs.microsoft.com/power-bi/guidance/dax-divide-function-operator", + ), + ( + "DAX Expressions", + "Measure", + "Error", + "Column references should be fully qualified", + lambda df: df["Has Unqualified Column Reference"], + "Using fully qualified column references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a column in DAX, first specify the table name, then specify the column name in square brackets.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Measure", + "Error", + "Measure references should be unqualified", + lambda df: df["Has Fully Qualified Measure Reference"], + "Using unqualified measure references makes it easier to distinguish between column and measure references, and also helps avoid certain errors. When referencing a measure using DAX, do not specify the table name. Use only the measure name in square brackets.", + "https://www.elegantbi.com/post/top10bestpractices", + ), + ( + "DAX Expressions", + "Relationship", + "Warning", + "Inactive relationships that are never activated", + lambda df: df["Inactive without USERELATIONSHIP"], + "Inactive relationships are activated using the USERELATIONSHIP function. If an inactive relationship is not referenced in any measure via this function, the relationship will not be used. It should be determined whether the relationship is not necessary or to activate the relationship via this method.", + "https://dax.guide/userelationship", + ), + ( + "Maintenance", + "Column", + "Warning", + "Remove unnecessary columns", + lambda df: (df["Hidden"] | df["Parent Is Hidden"]) + & ~df["Used in Relationship"] + & ~df["Used in Sort By"] + & ~df["Used in Hierarchy"] + & (df["Referenced By"] == 0) + & ~(df["Used in RLS"]), # usedInOLS + "Hidden columns that are not referenced by any DAX expressions, relationships, hierarchy levels or Sort By-properties should be removed.", + ), + ( + "Maintenance", + "Measure", + "Warning", + "Remove unnecessary measures", + lambda df: df["Measure Hidden"] & (df["Referenced By"] == 0), + "Hidden measures that are not referenced by any DAX expressions should be removed for maintainability.", + ), + # ('Maintenance', 'Role', 'Warning', 'Remove roles with no members', + # lambda df: df['Member Count'] == 0, + # ), + ( + "Maintenance", + "Table", + "Warning", + "Ensure tables have relationships", + lambda df: (df["Used in Relationship"] == False) + & (df["Type"] != "Calculation Group"), + "This rule highlights tables which are not connected to any other table in the model with a relationship.", + ), + ( + "Maintenance", + "Table", + "Warning", + "Calculation groups with no calculation items", + lambda df: (df["Type"] == "Calculation Group") + & (df["Has Calculation Items"]), + ), + ( + "Maintenance", + "Column", + "Info", + "Visible objects with no description", + lambda df: (df["Hidden"] == False) & (df["Description"].str.len() == 0), + "Calculation groups have no function unless they have calculation items.", + ), + ( + "Formatting", + "Column", + "Warning", + "Provide format string for 'Date' columns", + lambda df: (df["Column Name"].str.contains(r"date", case=False)) + & (df["Data Type"] == "DateTime") + & (df["Format String"] != "mm/dd/yyyy"), + 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".', + ), + ( + "Formatting", + "Column", + "Warning", + "Do not summarize numeric columns", + lambda df: ( + (df["Data Type"] == "Int64") + | (df["Data Type"] == "Decimal") + | (df["Data Type"] == "Double") + ) + & (df["Summarize By"] != "None") + & ~((df["Hidden"]) | (df["Parent Is Hidden"])), + 'Numeric columns (integer, decimal, double) should have their SummarizeBy property set to "None" to avoid accidental summation in Power BI (create measures instead).', + ), + ( + "Formatting", + "Measure", + "Info", + "Provide format string for measures", + lambda df: ~((df["Measure Hidden"]) | (df["Parent Is Hidden"])) + & (df["Format String"].str.len() == 0), + "Visible measures should have their format string property assigned.", + ), + ( + "Formatting", + "Column", + "Info", + "Add data category for columns", + lambda df: (df["Data Category"] == "") + & ( + ( + ( + (df["Column Name"].str.contains(r"country", case=False)) + | (df["Column Name"].str.contains(r"city", case=False)) + | (df["Column Name"].str.contains(r"continent", case=False)) + ) + & (df["Data Type"] == "String") + ) + | ( + ( + (df["Column Name"].str.contains(r"latitude", case=False)) + | (df["Column Name"].str.contains(r"longitude", case=False)) + ) + & (df["Data Type"] == "String") + ) + ), + "Add Data Category property for appropriate columns.", + "https://docs.microsoft.com/power-bi/transform-model/desktop-data-categorization", + ), + ( + "Formatting", + "Measure", + "Warning", + "Percentages should be formatted with thousands separators and 1 decimal", + lambda df: (df["Format String"].str.contains("%")) + & (df["Format String"] != "#,0.0%;-#,0.0%;#,0.0%"), + ), + ( + "Formatting", + "Measure", + "Warning", + "Whole numbers should be formatted with thousands separators and no decimals", + lambda df: (~df["Format String"].str.contains("$")) + & ~(df["Format String"].str.contains("%")) + & ~((df["Format String"] == "#,0") | (df["Format String"] == "#,0.0")), + ), + ( + "Formatting", + "Column", + "Info", + "Hide foreign keys", + lambda df: (df["Foreign Key"]) & (df["Hidden"] == False), + "Foreign keys should always be hidden.", + ), + ( + "Formatting", + "Column", + "Info", + "Mark primary keys", + lambda df: (df["Primary Key"]) & (df["Key"] == False), + "Set the 'Key' property to 'True' for primary key columns within the column properties.", + ), + ( + "Formatting", + "Column", + "Info", + "Month (as a string) must be sorted", + lambda df: (df["Column Name"].str.contains(r"month", case=False)) + & ~(df["Column Name"].str.contains(r"months", case=False)) + & (df["Data Type"] == "String") + & (df["Sort By Column"] == ""), + "This rule highlights month columns which are strings and are not sorted. If left unsorted, they will sort alphabetically (i.e. April, August...). Make sure to sort such columns so that they sort properly (January, February, March...).", + ), + ( + "Formatting", + "Relationship", + "Warning", + "Relationship columns should be of integer data type", + lambda df: (df["From Column Data Type"] != "Int64") + | (df["To Column Data Type"] != "Int64"), + "It is a best practice for relationship columns to be of integer data type. This applies not only to data warehousing but data modeling as well.", + ), + ( + "Formatting", + "Column", + "Warning", + 'Provide format string for "Month" columns', + lambda df: (df["Column Name"].str.contains(r"month", case=False)) + & (df["Data Type"] == "DateTime") + & (df["Format String"] != "MMMM yyyy"), + 'Columns of type "DateTime" that have "Month" in their names should be formatted as "MMMM yyyy".', + ), + ( + "Formatting", + "Column", + "Info", + "Format flag columns as Yes/No value strings", + lambda df: ( + df["Column Name"].str.startswith("Is") + & (df["Data Type"] == "Int64") + & ~(df["Hidden"] | df["Parent Is Hidden"]) + ) + | ( + df["Column Name"].str.endswith(" Flag") + & (df["Data Type"] != "String") + & ~(df["Hidden"] | df["Parent Is Hidden"]) + ), + "Flags must be properly formatted as Yes/No as this is easier to read than using 0/1 integer values.", + ), + # ('Formatting', ['Table', 'Column', 'Measure', 'Partition', 'Hierarchy'], 'Error', 'Objects should not start or end with a space', + # lambda df: (df['Name'].str[0] == ' ') | (df['Name'].str[-1] == ' '), + # 'Objects should not start or end with a space. This usually happens by accident and is difficult to find.', + # ), + ( + "Formatting", + ["Table", "Column", "Measure", "Partition", "Hierarchy"], + "Info", + "First letter of objects must be capitalized", + lambda df: df["Name"].str[0].str.upper() != df["Name"].str[0], + "The first letter of object names should be capitalized to maintain professional quality.", + ), + ( + "Naming Conventions", + ["Table", "Column", "Measure", "Partition", "Hierarchy"], + "Warning", + "Object names must not contain special characters", + lambda df: df["Name"].str.contains(r"[\t\r\n]"), + "Object names should not include tabs, line breaks, etc.", + ), # , + # ('Error Prevention', ['Table'], 'Error', 'Avoid invalid characters in names', + # lambda df: df['Name'].str.char.iscontrol() & ~ df['Name'].str.char.isspace(), + # )#, + ], + columns=[ + "Category", + "Scope", + "Severity", + "Rule Name", + "Expression", + "Description", + "URL", + ], + ) + + df_rules["Severity"] = ( + df_rules["Severity"] + .replace("Warning", "⚠️") + .replace("Error", "\u274C") + .replace("Info", "ℹ️") + ) + + pd.set_option("display.max_colwidth", 1000) + + return df_rules + + +@log +def run_model_bpa( + dataset: str, + rules_dataframe: Optional[pd.DataFrame] = None, + workspace: Optional[str] = None, + export: Optional[bool] = False, + return_dataframe: Optional[bool] = False, + **kwargs, +): + """ + Displays an HTML visualization of the results of the Best Practice Analyzer scan for a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + rules_dataframe : pandas.DataFrame, default=None + A pandas dataframe containing rules to be evaluated. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + export : bool, default=False + If True, exports the resulting dataframe to a delta table in the lakehouse attached to the notebook. + return_dataframe : bool, default=False + If True, returns a pandas dataframe instead of the visualization. + + Returns + ------- + pandas.DataFrame + A pandas dataframe in HTML format showing semantic model objects which violated the best practice analyzer rules. + """ + + if "extend" in kwargs: + print( + "The 'extend' parameter has been deprecated. Please remove this parameter from the function going forward." + ) + del kwargs["extend"] + + warnings.filterwarnings( + "ignore", + message="This pattern is interpreted as a regular expression, and has match groups.", + ) + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if rules_dataframe is None: + rules_dataframe = model_bpa_rules() + + dfT = fabric.list_tables(dataset=dataset, workspace=workspace, extended=True) + dfT = dfT.drop_duplicates() + dfC = fabric.list_columns( + dataset=dataset, + workspace=workspace, + extended=True, + additional_xmla_properties=["Parent.DataCategory", "Parent.IsHidden"], + ) + dfC = dfC[~dfC["Column Name"].str.startswith("RowNumber-")] + + dfM = fabric.list_measures( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["Parent.IsHidden"], + ) + dfR = fabric.list_relationships( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["FromCardinality", "ToCardinality"], + ) + dfP = fabric.list_partitions( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["DataCoverageDefinition.Expression"], + ) + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfRole = fabric.get_roles(dataset=dataset, workspace=workspace) + dfRM = fabric.get_roles(dataset=dataset, workspace=workspace, include_members=True) + dfRLS = fabric.get_row_level_security_permissions( + dataset=dataset, workspace=workspace + ) + # dfTr = fabric.list_translations(dataset = datasetName, workspace = workspaceName) + # dfE = fabric.list_expressions(dataset = datasetName, workspace = workspaceName) + dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace) + # dfDS = fabric.list_datasources(dataset = datasetName, workspace = workspaceName) + # dfPersp = fabric.list_perspectives(dataset = datasetName, workspace = workspaceName) + dfD = fabric.list_datasets(mode="rest", workspace=workspace) + dfD = dfD[dfD["Dataset Name"] == dataset] + # datasetOwner = dfD['Configured By'].iloc[0] + md = get_measure_dependencies(dataset, workspace) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) + dfC["Is Direct Lake"] = isDirectLake + dfT["Is Direct Lake"] = isDirectLake + + cols = ["From Cardinality", "To Cardinality"] + + for col in cols: + if not col in dfR: + dfR[col] = None + + cols = ["Parent Is Hidden"] + + for col in cols: + if not col in dfM: + dfM[col] = None + + # Data Coverage Definition rule + dfP_imp = dfP[dfP["Mode"] == "Import"] + dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True) + dfP = pd.merge( + dfP, dfTP[["Table Name", "Import Partitions"]], on="Table Name", how="left" + ) + dfP["Import Partitions"].fillna(0, inplace=True) + dfC_DateKey = dfC[ + (dfC["Parent Data Category"] == "Time") + & (dfC["Data Type"] == "DateTime") + & (dfC["Key"]) + ] + hasDateTable = False + + if len(dfC_DateKey) > 0: + hasDateTable = True + + dfP["Has Date Table"] = hasDateTable + + # Set dims to dual mode + dfR_one = dfR[dfR["To Cardinality"] == "One"] + dfTP = dfP_imp.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Import Partitions"}, inplace=True) + dfT = pd.merge(dfT, dfTP, left_on="Name", right_on="Table Name", how="left") + dfT.drop(columns=["Table Name"], inplace=True) + dfT["Import Partitions"].fillna(0, inplace=True) + hasDQ = any(r["Mode"] == "DirectQuery" for i, r in dfP.iterrows()) + dfT["Model Has DQ"] = hasDQ + dfT["Used in Relationship x:1"] = dfT["Name"].isin(dfR_one["To Table"]) + + dfF = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" + SELECT [FUNCTION_NAME] + FROM $SYSTEM.MDSCHEMA_FUNCTIONS + WHERE [INTERFACE_NAME] = 'DATETIME' + """, + ) + + dfC["Name"] = dfC["Column Name"] + dfH["Name"] = dfH["Hierarchy Name"] + dfM["Name"] = dfM["Measure Name"] + dfP["Name"] = dfP["Partition Name"] + dfRole["Name"] = dfRole["Role"] + dfD["Name"] = dfD["Dataset Name"] + dfH["Description"] = dfH["Hierarchy Description"] + dfM["Description"] = dfM["Measure Description"] + dfH["Hierarchy Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Hierarchy Name"] + ) + + dfCI["Calculation Object"] = format_dax_object_name( + dfCI["Calculation Group Name"], dfCI["Calculation Item Name"] + ) + + dfRole["Member Count"] = dfRM["Role"].isin(dfRole["Role"]).sum() + dfRLS["Is Dynamic"] = dfRLS["Filter Expression"].str.contains( + r"userprincipalname\s*\(", case=False + ) | dfRLS["Filter Expression"].str.contains(r"username\s*\(", case=False) + + # Partition Count + partition_count = ( + dfP.groupby("Table Name").size().reset_index(name="Partition Count") + ) + dfT = pd.merge( + dfT, partition_count, left_on="Name", right_on="Table Name", how="left" + ).drop("Table Name", axis=1) + dfT["Partition Count"] = dfT["Partition Count"].fillna(0).astype(int) + + dfT = dfT.merge( + dfP[["Table Name", "Partition Name"]], + how="left", + left_on="Name", + right_on="Table Name", + ) + dfT["First Partition Name"] = dfT.groupby("Name")["Partition Name"].transform( + "first" + ) + dfT.drop("Table Name", axis=1, inplace=True) + + dfC["Sort By Column Object"] = format_dax_object_name( + dfC["Table Name"], dfC["Sort By Column"] + ) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfM["Measure Object"] = "[" + dfM["Measure Name"] + "]" + dfM["Measure Fully Qualified"] = format_dax_object_name( + dfM["Table Name"], dfM["Measure Name"] + ) + dfM["Measure Fully Qualified No Spaces"] = ( + dfM["Table Name"] + "[" + dfM["Measure Name"] + "]" + ) + # dfM['Measure Fully Qualified No Spaces'] = dfM.apply(lambda row: row['Table Name'] + '[' + row['Measure Name'] + ']' if ' ' not in row['Table Name'] else '', axis=1) + dfC["Column Unqualified"] = "[" + dfC["Column Name"] + "]" + dfC["Column Object No Spaces"] = dfC.apply( + lambda row: ( + row["Table Name"] + "[" + row["Column Name"] + "]" + if " " not in row["Table Name"] + else "" + ), + axis=1, + ) + dfC["Used in Sort By"] = dfC["Column Object"].isin(dfC["Sort By Column Object"]) + dfH["Column Object"] = format_dax_object_name(dfH["Table Name"], dfH["Column Name"]) + dfC["Used in Hierarchy"] = dfC["Column Object"].isin(dfH["Column Object"]) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + dfT["Used in Relationship"] = dfT["Name"].isin(dfR["From Table"]) | dfT[ + "Name" + ].isin(dfR["To Table"]) + dfT["Used in Relationship Both Sides"] = dfT["Name"].isin(dfR["From Table"]) & dfT[ + "Name" + ].isin(dfR["To Table"]) + dfC["Used in Relationship"] = dfC["Column Object"].isin(dfR["From Object"]) | dfC[ + "Column Object" + ].isin(dfR["To Object"]) + + dfR_filt = dfR[ + (dfR["Cross Filtering Behavior"] == "BothDirections") + | (dfR["Multiplicity"] == "m:m") + ] + dfC["Used in M2M/BiDi Relationship"] = dfC["Column Object"].isin( + dfR_filt["From Object"] + ) | dfC["Column Object"].isin(dfR_filt["To Object"]) + dfC["Foreign Key"] = dfC["Column Object"].isin( + dfR[dfR["From Cardinality"] == "Many"]["From Object"] + ) + dfC["Primary Key"] = dfC["Column Object"].isin( + dfR[dfR["To Cardinality"] == "One"]["To Object"] + ) + dfT["Used in M2M Relationship"] = dfT["Name"].isin( + dfR[dfR["Multiplicity"] == "m:m"][["From Table"]] + ) | dfT["Name"].isin(dfR[dfR["Multiplicity"] == "m:m"][["To Table"]]) + dfT["Used in Dynamic RLS"] = dfT["Name"].isin(dfRLS[dfRLS["Is Dynamic"]]["Table"]) + dfT["Used in RLS"] = dfT["Name"].isin( + dfRLS.loc[dfRLS["Filter Expression"].str.len() > 0, "Table"] + ) + dfC["Primary Key"] = dfC["Column Object"].isin( + dfR.loc[dfR["To Cardinality"] == "One", "To Object"] + ) + dfD["Has Date Table"] = any( + (r["Parent Data Category"] == "Time") + & (r["Data Type"] == "DateTime") + & (r["Key"] == True) + for i, r in dfC.iterrows() + ) + # dfC['In Date Table'] = dfC['Table Name'].isin(dfT.loc[dfT['Data Category'] == "Time", 'Name']) + dfD["Relationship Count"] = len(dfR) + dfD["M2M or BiDi Relationship Count"] = len( + dfR[ + (dfR["Multiplicity"] == "m:m") + | (dfR["Cross Filtering Behavior"] == "BothDirections") + ] + ) + dfD["Calculation Group Count"] = len(dfT[dfT["Type"] == "Calculation Group"]) + dfT["Has Calculation Items"] = np.where( + (dfT["Type"] == "Calculation Group") + & dfT["Name"].isin(dfCI["Calculation Group Name"]), + True, + False, + ) + dfP["Partition Object"] = format_dax_object_name( + dfP["Table Name"], dfP["Partition Name"] + ) + dfRLS["RLS Object"] = format_dax_object_name(dfRLS["Role"], dfRLS["Table"]) + + function_pattern = "|".join(dfF["FUNCTION_NAME"].map(re.escape)) + + dfM["DQ Date Function Used"] = any(dfP["Mode"] == "DirectQuery") & dfM[ + "Measure Expression" + ].str.contains(f"({function_pattern})\\s*\\(", case=False, regex=True) + + md["Reference"] = ( + "'" + md["Referenced Table"] + "'[" + md["Referenced Object"] + "]" + ) + + dfC["Referenced By"] = ( + md[ + (md["Referenced Object Type"] == "Column") + & (md["Reference"].isin(dfC["Column Object"])) + ] + .groupby("Reference") + .size() + .reset_index(name="Count")["Count"] + ) + dfC["Referenced By"].fillna(0, inplace=True) + dfC["Referenced By"] = dfC["Referenced By"].fillna(0).astype(int) + + dfM["Referenced By"] = ( + md[ + (md["Referenced Object Type"] == "Measure") + & (md["Referenced Object"].isin(dfM["Measure Name"])) + ] + .groupby("Referenced Object") + .size() + .reset_index(name="Count")["Count"] + ) + dfM["Referenced By"].fillna(0, inplace=True) + dfM["Referenced By"] = dfM["Referenced By"].fillna(0).astype(int) + + pattern = "[^\( ][a-zA-Z0-9_()-]+\[[^\[]+\]|'[^']+'\[[^\[]+\]|\[[^\[]+\]" + + dfM["Has Fully Qualified Measure Reference"] = False + dfM["Has Unqualified Column Reference"] = False + + for i, r in dfM.iterrows(): + tName = r["Table Name"] + mName = r["Measure Name"] + expr = r["Measure Expression"] + + matches = re.findall(pattern, expr) + + for m in matches: + if m[0] == "[": + if (m in dfC["Column Unqualified"].values) and ( + dfC[dfC["Table Name"] == tName]["Column Unqualified"] == m + ).any(): + dfM.at[i, "Has Unqualified Column Reference"] = True + else: + if (m in dfM["Measure Fully Qualified"].values) | ( + m in dfM["Measure Fully Qualified No Spaces"].values + ): + dfM.at[i, "Has Fully Qualified Measure Reference"] = True + + dfR["Inactive without USERELATIONSHIP"] = False + for i, r in dfR[dfR["Active"] == False].iterrows(): + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + + dfM_filt = dfM[ + dfM["Measure Expression"].str.contains( + "(?i)USERELATIONSHIP\s*\(\s*'*" + + fromTable + + "'*\[" + + fromColumn + + "\]\s*,\s*'*" + + toTable + + "'*\[" + + toColumn + + "\]", + regex=True, + ) + ] + if len(dfM_filt) == 0: + dfR.at[i, "Inactive without USERELATIONSHIP"] = True + + dfC["Used in RLS"] = ( + dfC["Column Object No Spaces"].isin(dfRLS["Filter Expression"]) + | dfC["Column Object"].isin(dfRLS["Filter Expression"]) + | dfC.apply( + lambda row: any( + row["Column Name"] in expr + for expr in dfRLS.loc[ + dfRLS["Table"] == row["Table Name"], "Filter Expression" + ] + ), + axis=1, + ) + ) + + # Merge dfR and dfC based on 'From Object' and 'Column Object' + merged_from = pd.merge( + dfR, dfC, left_on="From Object", right_on="Column Object", how="left" + ) + merged_to = pd.merge( + dfR, dfC, left_on="To Object", right_on="Column Object", how="left" + ) + + dfR["From Column Data Type"] = merged_from["Data Type"] + dfR["To Column Data Type"] = merged_to["Data Type"] + + # Check if USERELATIONSHIP objects are used in a given column, table + userelationship_pattern = re.compile( + r"USERELATIONSHIP\s*\(\s*(.*?)\s*,\s*(.*?)\s*\)", re.DOTALL | re.IGNORECASE + ) + + # Function to extract objects within USERELATIONSHIP function + def extract_objects(measure_expression): + matches = userelationship_pattern.findall(measure_expression) + if matches: + return [obj.strip() for match in matches for obj in match] + else: + return [] + + dfM["USERELATIONSHIP Objects"] = dfM["Measure Expression"].apply(extract_objects) + flat_object_list = [ + item for sublist in dfM["USERELATIONSHIP Objects"] for item in sublist + ] + dfC["USERELATIONSHIP Used"] = dfC["Column Object"].isin(flat_object_list) | dfC[ + "Column Object No Spaces" + ].isin(flat_object_list) + dfT["USERELATIONSHIP Used"] = dfT["Name"].isin( + dfC[dfC["USERELATIONSHIP Used"]]["Table Name"] + ) + dfR["Relationship Name"] = ( + format_dax_object_name(dfR["From Table"], dfR["From Column"]) + + " -> " + + format_dax_object_name(dfR["To Table"], dfR["To Column"]) + ) + dfH = dfH[ + [ + "Name", + "Description", + "Table Name", + "Hierarchy Name", + "Hierarchy Description", + "Hierarchy Object", + ] + ].drop_duplicates() + + scope_to_dataframe = { + "Table": (dfT, ["Name"]), + "Partition": (dfP, ["Partition Object"]), + "Column": (dfC, ["Column Object"]), + "Hierarchy": (dfH, ["Hierarchy Object"]), + "Measure": (dfM, ["Measure Name"]), + "Calculation Item": (dfCI, ["Calculation Object"]), + "Relationship": (dfR, ["Relationship Name"]), + "Row Level Security": (dfRLS, ["RLS Object"]), + "Role": (dfRole, ["Role"]), + "Model": (dfD, ["Dataset Name"]), + } + + def execute_rule(row): + scopes = row["Scope"] + + # support both str and list as scope type + if isinstance(scopes, str): + scopes = [scopes] + + # collect output dataframes + df_outputs = [] + + for scope in scopes: + # common fields for each scope + (df, violation_cols_or_func) = scope_to_dataframe[scope] + + if scope in ["Hierarchy", "Measure"] and len(df) == 0: + continue + # execute rule and subset df + df_violations = df[row["Expression"](df)] + + # subset the right output columns (e.g. Table Name & Column Name) + if isinstance(violation_cols_or_func, list): + violation_func = lambda violations: violations[violation_cols_or_func] + else: + violation_func = violation_cols_or_func + + # build output data frame + df_output = violation_func(df_violations).copy() + + df_output.columns = ["Object Name"] + df_output["Rule Name"] = row["Rule Name"] + df_output["Category"] = row["Category"] + + df_output["Object Type"] = scope + df_output["Severity"] = row["Severity"] + df_output["Description"] = row["Description"] + df_output["URL"] = row["URL"] + + df_outputs.append(df_output) + + return df_outputs + + # flatten list of lists + flatten_dfs = [ + df for dfs in rules_dataframe.apply(execute_rule, axis=1).tolist() for df in dfs + ] + + finalDF = pd.concat(flatten_dfs, ignore_index=True) + + if export: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the Best Practice Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + dfExport = finalDF.copy() + delta_table_name = "modelbparesults" + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) + + lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name] + + dfExport["Severity"].replace("⚠️", "Warning", inplace=True) + dfExport["Severity"].replace("\u274C", "Error", inplace=True) + dfExport["Severity"].replace("ℹ️", "Info", inplace=True) + + spark = SparkSession.builder.getOrCreate() + query = f"SELECT MAX(RunId) FROM {lakehouse}.{delta_table_name}" + + if len(lakeT_filt) == 0: + runId = 1 + else: + dfSpark = spark.sql(query) + maxRunId = dfSpark.collect()[0][0] + runId = maxRunId + 1 + + now = datetime.datetime.now() + dfExport["Workspace Name"] = workspace + dfExport["Dataset Name"] = dataset + dfExport["Timestamp"] = now + dfExport["RunId"] = runId + + dfExport["RunId"] = dfExport["RunId"].astype("int") + + colName = "Workspace Name" + dfExport.insert(0, colName, dfExport.pop(colName)) + colName = "Dataset Name" + dfExport.insert(1, colName, dfExport.pop(colName)) + + dfExport.columns = dfExport.columns.str.replace(" ", "_") + spark_df = spark.createDataFrame(dfExport) + spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name) + print( + f"\u2022 Model Best Practice Analyzer results for the '{dataset}' semantic model have been appended to the '{delta_table_name}' delta table." + ) + + if return_dataframe: + return finalDF + + pd.set_option("display.max_colwidth", 100) + + finalDF = ( + finalDF[ + [ + "Category", + "Rule Name", + "Object Type", + "Object Name", + "Severity", + "Description", + "URL", + ] + ] + .sort_values(["Category", "Rule Name", "Object Type", "Object Name"]) + .set_index(["Category", "Rule Name"]) + ) + + bpa2 = finalDF.reset_index() + bpa_dict = { + cat: bpa2[bpa2["Category"] == cat].drop("Category", axis=1) + for cat in bpa2["Category"].drop_duplicates().values + } + + styles = """ + + """ + + # JavaScript for tab functionality + script = """ + + """ + + # JavaScript for dynamic tooltip positioning + dynamic_script = """ + + """ + + # HTML for tabs + tab_html = '
' + content_html = "" + for i, (title, df) in enumerate(bpa_dict.items()): + if df.shape[0] == 0: + continue + + tab_id = f"tab{i}" + active_class = "" + if i == 0: + active_class = "active" + + summary = " + ".join( + [f"{idx} ({v})" for idx, v in df["Severity"].value_counts().items()] + ) + tab_html += f'' + content_html += f'
' + + # Adding tooltip for Rule Name using Description column + content_html += '' + content_html += "" + for _, row in df.iterrows(): + content_html += f"" + if pd.notnull(row["URL"]): + content_html += f'' + elif pd.notnull(row["Description"]): + content_html += f'' + else: + content_html += f'' + content_html += f'' + content_html += f'' + content_html += f'' + content_html += f"" + content_html += "
Rule NameObject TypeObject NameSeverity
{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Description"]}{row["Rule Name"]}{row["Object Type"]}{row["Object Name"]}{row["Severity"]}
" + + content_html += "
" + tab_html += "
" + + # Display the tabs, tab contents, and run the script + return display(HTML(styles + tab_html + content_html + script)) diff --git a/src/sempy_labs/_model_dependencies.py b/src/sempy_labs/_model_dependencies.py new file mode 100644 index 00000000..d1e56789 --- /dev/null +++ b/src/sempy_labs/_model_dependencies.py @@ -0,0 +1,341 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import format_dax_object_name +from typing import List, Optional, Union +from anytree import Node, RenderTree +from sempy._utils._log import log + + +def get_measure_dependencies(dataset: str, workspace: Optional[str] = None): + """ + Shows all dependencies for all measures in a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + Shows all dependencies for all measures in the semantic model. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" + SELECT + [TABLE] AS [Table Name] + ,[OBJECT] AS [Object Name] + ,[OBJECT_TYPE] AS [Object Type] + ,[REFERENCED_TABLE] AS [Referenced Table] + ,[REFERENCED_OBJECT] AS [Referenced Object] + ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] + FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY + WHERE [OBJECT_TYPE] = 'MEASURE' + """, + ) + + dep["Object Type"] = dep["Object Type"].str.capitalize() + dep["Referenced Object Type"] = dep["Referenced Object Type"].str.capitalize() + + dep["Full Object Name"] = format_dax_object_name( + dep["Table Name"], dep["Object Name"] + ) + dep["Referenced Full Object Name"] = format_dax_object_name( + dep["Referenced Table"], dep["Referenced Object"] + ) + dep["Parent Node"] = dep["Object Name"] + + df = dep + + df["Done"] = df.apply( + lambda row: False if row["Referenced Object Type"] == "Measure" else True, + axis=1, + ) + + while any(df["Done"] == False): + for i, r in df.iterrows(): + rObjFull = r["Referenced Full Object Name"] + rObj = r["Referenced Object"] + if r["Done"] == False: + dep_filt = dep[dep["Full Object Name"] == rObjFull] + + for index, dependency in dep_filt.iterrows(): + d = True + if dependency[5] == "Measure": + d = False + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[4], + "Referenced Table": dependency[3], + "Referenced Object Type": dependency[5], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) + else: + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[5], + "Referenced Table": dependency[4], + "Referenced Object Type": dependency[6], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) + + df.loc[i, "Done"] = True + + df = df.drop(["Done", "Full Object Name", "Referenced Full Object Name"], axis=1) + + return df + + +def get_model_calc_dependencies(dataset: str, workspace: Optional[str] = None): + """ + Shows all dependencies for all objects in a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + Shows all dependencies for all objects in the semantic model. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dep = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" + SELECT + [TABLE] AS [Table Name] + ,[OBJECT] AS [Object Name] + ,[OBJECT_TYPE] AS [Object Type] + ,[EXPRESSION] AS [Expression] + ,[REFERENCED_TABLE] AS [Referenced Table] + ,[REFERENCED_OBJECT] AS [Referenced Object] + ,[REFERENCED_OBJECT_TYPE] AS [Referenced Object Type] + FROM $SYSTEM.DISCOVER_CALC_DEPENDENCY + """, + ) + + dep["Object Type"] = dep["Object Type"].str.replace("_", " ").str.title() + dep["Referenced Object Type"] = ( + dep["Referenced Object Type"].str.replace("_", " ").str.title() + ) + + dep["Full Object Name"] = format_dax_object_name( + dep["Table Name"], dep["Object Name"] + ) + dep["Referenced Full Object Name"] = format_dax_object_name( + dep["Referenced Table"], dep["Referenced Object"] + ) + dep["Parent Node"] = dep["Object Name"] + + df = dep + + objs = ["Measure", "Calc Column", "Calculation Item", "Calc Table"] + + df["Done"] = df.apply( + lambda row: False if row["Referenced Object Type"] in objs else True, axis=1 + ) + + while any(df["Done"] == False): + for i, r in df.iterrows(): + rObjFull = r["Referenced Full Object Name"] + rObj = r["Referenced Object"] + if r["Done"] == False: + dep_filt = dep[dep["Full Object Name"] == rObjFull] + + for index, dependency in dep_filt.iterrows(): + d = True + if dependency[5] in objs: + d = False + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[4], + "Referenced Table": dependency[3], + "Referenced Object Type": dependency[5], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) + else: + df = pd.concat( + [ + df, + pd.DataFrame( + [ + { + "Table Name": r["Table Name"], + "Object Name": r["Object Name"], + "Object Type": r["Object Type"], + "Referenced Object": dependency[5], + "Referenced Table": dependency[4], + "Referenced Object Type": dependency[6], + "Done": d, + "Full Object Name": r["Full Object Name"], + "Referenced Full Object Name": dependency[ + 7 + ], + "Parent Node": rObj, + } + ] + ), + ], + ignore_index=True, + ) + + df.loc[i, "Done"] = True + + df = df.drop(["Done"], axis=1) + + return df + + +@log +def measure_dependency_tree( + dataset: str, measure_name: str, workspace: Optional[str] = None +): + """ + Prints a measure dependency tree of all dependent objects for a measure in a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + measure_name : str + Name of the measure. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfM_filt = dfM[dfM["Measure Name"] == measure_name] + + if len(dfM_filt) == 0: + print( + f"The '{measure_name}' measure does not exist in the '{dataset}' semantic model in the '{workspace}' workspace." + ) + return + + md = get_measure_dependencies(dataset, workspace) + df_filt = md[md["Object Name"] == measure_name] + + # Create a dictionary to hold references to nodes + node_dict = {} + measureIcon = "\u2211" + tableIcon = "\u229E" + columnIcon = "\u229F" + + # Populate the tree + for _, row in df_filt.iterrows(): + # measure_name = row['Object Name'] + ref_obj_table_name = row["Referenced Table"] + ref_obj_name = row["Referenced Object"] + ref_obj_type = row["Referenced Object Type"] + parent_node_name = row["Parent Node"] + + # Create or get the parent node + parent_node = node_dict.get(parent_node_name) + if parent_node is None: + parent_node = Node(parent_node_name) + node_dict[parent_node_name] = parent_node + parent_node.custom_property = measureIcon + " " + + # Create the child node + child_node_name = ref_obj_name + child_node = Node(child_node_name, parent=parent_node) + if ref_obj_type == "Column": + child_node.custom_property = columnIcon + " '" + ref_obj_table_name + "'" + elif ref_obj_type == "Table": + child_node.custom_property = tableIcon + " " + elif ref_obj_type == "Measure": + child_node.custom_property = measureIcon + " " + + # Update the dictionary with the child node + node_dict[child_node_name] = child_node + + # Visualize the tree structure using RenderTree + for pre, _, node in RenderTree(node_dict[measure_name]): + if tableIcon in node.custom_property: + print(f"{pre}{node.custom_property}'{node.name}'") + else: + print(f"{pre}{node.custom_property}[{node.name}]") diff --git a/src/sempy_labs/_one_lake_integration.py b/src/sempy_labs/_one_lake_integration.py new file mode 100644 index 00000000..cf22e4f7 --- /dev/null +++ b/src/sempy_labs/_one_lake_integration.py @@ -0,0 +1,155 @@ +import sempy.fabric as fabric +import pandas as pd +from typing import Optional +from sempy._utils._log import log +from sempy_labs._helper_functions import resolve_workspace_name_and_id + + +@log +def export_model_to_onelake( + dataset: str, + workspace: Optional[str] = None, + destination_lakehouse: Optional[str] = None, + destination_workspace: Optional[str] = None, +): + """ + Exports a semantic model's tables to delta tables in the lakehouse. Creates shortcuts to the tables if a lakehouse is specified. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + destination_lakehouse : str, default=None + The name of the Fabric lakehouse where shortcuts will be created to access the delta tables created by the export. If the lakehouse specified does not exist, one will be created with that name. If no lakehouse is specified, shortcuts will not be created. + destination_workspace : str, default=None + The name of the Fabric workspace in which the lakehouse resides. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if destination_workspace == None: + destination_workspace = workspace + destination_workspace_id = workspace_id + else: + destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) + + dfD = fabric.list_datasets(workspace=workspace) + dfD_filt = dfD[dfD["Dataset Name"] == dataset] + + if len(dfD_filt) == 0: + print( + f"The '{dataset}' semantic model does not exist in the '{workspace}' workspace." + ) + return + + tmsl = f""" + {{ + 'export': {{ + 'layout': 'delta', + 'type': 'full', + 'objects': [ + {{ + 'database': '{dataset}' + }} + ] + }} + }} + """ + + # Export model's tables as delta tables + try: + fabric.execute_tmsl(script=tmsl, workspace=workspace) + print( + f"The '{dataset}' semantic model's tables have been exported as delta tables to the '{workspace}' workspace.\n" + ) + except: + print( + f"ERROR: The '{dataset}' semantic model's tables have not been exported as delta tables to the '{workspace}' workspace." + ) + print( + f"Make sure you enable OneLake integration for the '{dataset}' semantic model. Follow the instructions here: https://learn.microsoft.com/power-bi/enterprise/onelake-integration-overview#enable-onelake-integration" + ) + return + + # Create shortcuts if destination lakehouse is specified + if destination_lakehouse is not None: + # Destination... + dfI_Dest = fabric.list_items(workspace=destination_workspace, type="Lakehouse") + dfI_filt = dfI_Dest[(dfI_Dest["Display Name"] == destination_lakehouse)] + + if len(dfI_filt) == 0: + print( + f"The '{destination_lakehouse}' lakehouse does not exist within the '{destination_workspace}' workspace." + ) + # Create lakehouse + destination_lakehouse_id = fabric.create_lakehouse( + display_name=destination_lakehouse, workspace=destination_workspace + ) + print( + f"The '{destination_lakehouse}' lakehouse has been created within the '{destination_workspace}' workspace.\n" + ) + else: + destination_lakehouse_id = dfI_filt["Id"].iloc[0] + + # Source... + dfI_Source = fabric.list_items(workspace=workspace, type="SemanticModel") + dfI_filtSource = dfI_Source[(dfI_Source["Display Name"] == dataset)] + sourceLakehouseId = dfI_filtSource["Id"].iloc[0] + + # Valid tables + dfP = fabric.list_partitions( + dataset=dataset, + workspace=workspace, + additional_xmla_properties=["Parent.SystemManaged"], + ) + dfP_filt = dfP[ + (dfP["Mode"] == "Import") + & (dfP["Source Type"] != "CalculationGroup") + & (dfP["Parent System Managed"] == False) + ] + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + tmc = pd.DataFrame(dfP.groupby("Table Name")["Mode"].nunique()).reset_index() + oneMode = tmc[tmc["Mode"] == 1] + tableAll = dfP_filt[ + dfP_filt["Table Name"].isin(dfC["Table Name"].values) + & (dfP_filt["Table Name"].isin(oneMode["Table Name"].values)) + ] + tables = tableAll["Table Name"].unique() + + client = fabric.FabricRestClient() + + print("Creating shortcuts...\n") + for tableName in tables: + tablePath = "Tables/" + tableName + shortcutName = tableName.replace(" ", "") + request_body = { + "path": "Tables", + "name": shortcutName, + "target": { + "oneLake": { + "workspaceId": workspace_id, + "itemId": sourceLakehouseId, + "path": tablePath, + } + }, + } + + try: + response = client.post( + f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts", + json=request_body, + ) + if response.status_code == 201: + print( + f"\u2022 The shortcut '{shortcutName}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace}' workspace. It is based on the '{tableName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.\n" + ) + else: + print(response.status_code) + except: + print( + f"ERROR: Failed to create a shortcut for the '{tableName}' table." + ) diff --git a/sempy_labs/QSO.py b/src/sempy_labs/_query_scale_out.py similarity index 50% rename from sempy_labs/QSO.py rename to src/sempy_labs/_query_scale_out.py index a685e94b..d38a7b2c 100644 --- a/sempy_labs/QSO.py +++ b/src/sempy_labs/_query_scale_out.py @@ -1,16 +1,12 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .HelperFunctions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' def qso_sync(dataset: str, workspace: Optional[str] = None): - """ Triggers a query scale-out sync of read-only replicas for the specified dataset from the specified workspace. @@ -22,14 +18,13 @@ def qso_sync(dataset: str, workspace: Optional[str] = None): The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - - """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group + """ + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/trigger-query-scale-out-sync-in-group if workspace is None: workspace_id = fabric.get_workspace_id() @@ -40,15 +35,21 @@ def qso_sync(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) client = fabric.PowerBIRestClient() - response = client.post(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/sync") + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/sync" + ) if response.status_code == 200: - print(f"{green_dot} QSO sync initiated for the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.green_dot} QSO sync initiated for the '{dataset}' semantic model within the '{workspace}' workspace." + ) else: - print(f"{red_dot} QSO sync failed for the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.red_dot} QSO sync failed for the '{dataset}' semantic model within the '{workspace}' workspace." + ) -def qso_sync_status(dataset: str, workspace: Optional[str] = None): +def qso_sync_status(dataset: str, workspace: Optional[str] = None): """ Returns the query scale-out sync status for the specified dataset from the specified workspace. @@ -63,14 +64,28 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): Returns ------- - - """ - - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group - df = pd.DataFrame(columns=['Scale Out Status', 'Sync Start Time', 'Sync End Time', 'Commit Version', 'Commit Timestamp', 'Target Sync Version', 'Target Sync Timestamp', 'Trigger Reason', 'Min Active Read Version', 'Min Active Read Timestamp']) - dfRep = pd.DataFrame(columns=['Replica ID', 'Replica Type', 'Replica Version', 'Replica Timestamp']) + """ + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-query-scale-out-sync-status-in-group + + df = pd.DataFrame( + columns=[ + "Scale Out Status", + "Sync Start Time", + "Sync End Time", + "Commit Version", + "Commit Timestamp", + "Target Sync Version", + "Target Sync Timestamp", + "Trigger Reason", + "Min Active Read Version", + "Min Active Read Timestamp", + ] + ) + dfRep = pd.DataFrame( + columns=["Replica ID", "Replica Type", "Replica Version", "Replica Timestamp"] + ) if workspace is None: workspace_id = fabric.get_workspace_id() @@ -81,29 +96,51 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) client = fabric.PowerBIRestClient() - response = client.get(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/syncStatus") + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/queryScaleOut/syncStatus" + ) if response.status_code == 200: o = response.json() - sos = o['scaleOutStatus'] - - if sos == 'Enabled': - new_data = {'Scale Out Status': o['scaleOutStatus'], 'Sync Start Time': o['syncStartTime'], 'Sync End Time': o['syncEndTime'], 'Commit Version': o['commitVersion'], 'Commit Timestamp': o['commitTimestamp'], 'Target Sync Version': o['targetSyncVersion'], 'Target Sync Timestamp': o['targetSyncTimestamp'], 'Trigger Reason': o['triggerReason'], 'Min Active Read Version': o['minActiveReadVersion'], 'Min Active Read Timestamp': o['minActiveReadTimestamp']} + sos = o["scaleOutStatus"] + + if sos == "Enabled": + new_data = { + "Scale Out Status": o["scaleOutStatus"], + "Sync Start Time": o["syncStartTime"], + "Sync End Time": o["syncEndTime"], + "Commit Version": o["commitVersion"], + "Commit Timestamp": o["commitTimestamp"], + "Target Sync Version": o["targetSyncVersion"], + "Target Sync Timestamp": o["targetSyncTimestamp"], + "Trigger Reason": o["triggerReason"], + "Min Active Read Version": o["minActiveReadVersion"], + "Min Active Read Timestamp": o["minActiveReadTimestamp"], + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - for r in o['scaleOutReplicas']: - new_data = {'Replica ID': r['replicaId'], 'Replica Type': r['replicaType'], 'Replica Version': str(r['replicaVersion']), 'Replica Timestamp': r['replicaTimestamp']} - dfRep = pd.concat([dfRep, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - df['Sync Start Time'] = pd.to_datetime(df['Sync Start Time']) - df['Sync End Time'] = pd.to_datetime(df['Sync End Time']) - df['Commit Timestamp'] = pd.to_datetime(df['Commit Timestamp']) - df['Target Sync Timestamp'] = pd.to_datetime(df['Target Sync Timestamp']) - df['Min Active Read Timestamp'] = pd.to_datetime(df['Min Active Read Timestamp']) - dfRep['Replica Timestamp'] = pd.to_datetime(dfRep['Replica Timestamp']) - df['Commit Version'] = df['Commit Version'].astype('int') - df['Target Sync Version'] = df['Target Sync Version'].astype('int') - df['Min Active Read Version'] = df['Min Active Read Version'].astype('int') + for r in o["scaleOutReplicas"]: + new_data = { + "Replica ID": r["replicaId"], + "Replica Type": r["replicaType"], + "Replica Version": str(r["replicaVersion"]), + "Replica Timestamp": r["replicaTimestamp"], + } + dfRep = pd.concat( + [dfRep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + df["Sync Start Time"] = pd.to_datetime(df["Sync Start Time"]) + df["Sync End Time"] = pd.to_datetime(df["Sync End Time"]) + df["Commit Timestamp"] = pd.to_datetime(df["Commit Timestamp"]) + df["Target Sync Timestamp"] = pd.to_datetime(df["Target Sync Timestamp"]) + df["Min Active Read Timestamp"] = pd.to_datetime( + df["Min Active Read Timestamp"] + ) + dfRep["Replica Timestamp"] = pd.to_datetime(dfRep["Replica Timestamp"]) + df["Commit Version"] = df["Commit Version"].astype("int") + df["Target Sync Version"] = df["Target Sync Version"].astype("int") + df["Min Active Read Version"] = df["Min Active Read Version"].astype("int") return df, dfRep else: @@ -112,8 +149,8 @@ def qso_sync_status(dataset: str, workspace: Optional[str] = None): else: return response.status_code -def disable_qso(dataset: str, workspace: Optional[str] = None): +def disable_qso(dataset: str, workspace: Optional[str] = None): """ Sets the max read-only replicas to 0, disabling query scale out. @@ -128,7 +165,7 @@ def disable_qso(dataset: str, workspace: Optional[str] = None): Returns ------- - + """ if workspace is None: @@ -139,23 +176,28 @@ def disable_qso(dataset: str, workspace: Optional[str] = None): dataset_id = resolve_dataset_id(dataset, workspace) - request_body = { - "queryScaleOutSettings": { - "maxReadOnlyReplicas": '0' - } - } + request_body = {"queryScaleOutSettings": {"maxReadOnlyReplicas": "0"}} client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json=request_body + ) if response.status_code == 200: - df = list_qso_settings(dataset = dataset, workspace = workspace) - print(f"{green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace.") + df = list_qso_settings(dataset=dataset, workspace=workspace) + print( + f"{icons.green_dot} Query scale out has been disabled for the '{dataset}' semantic model within the '{workspace}' workspace." + ) return df else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" -def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replicas: Optional[int] = -1, workspace: Optional[str] = None): +def set_qso( + dataset: str, + auto_sync: Optional[bool] = True, + max_read_only_replicas: Optional[int] = -1, + workspace: Optional[str] = None, +): """ Sets the query scale out settings for a semantic model. @@ -174,10 +216,10 @@ def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replic Returns ------- - + """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group + # https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/update-dataset-in-group if workspace is None: workspace_id = fabric.get_workspace_id() @@ -188,34 +230,46 @@ def set_qso(dataset: str, auto_sync: Optional[bool] = True, max_read_only_replic dataset_id = resolve_dataset_id(dataset, workspace) if max_read_only_replicas == 0: - disable_qso(dataset = dataset, workspace = workspace) + disable_qso(dataset=dataset, workspace=workspace) return request_body = { "queryScaleOutSettings": { "autoSyncReadOnlyReplicas": auto_sync, - "maxReadOnlyReplicas": str(max_read_only_replicas) + "maxReadOnlyReplicas": str(max_read_only_replicas), } } - ssm = set_semantic_model_storage_format(dataset = dataset, storage_format='Large', workspace=workspace) + ssm = set_semantic_model_storage_format( + dataset=dataset, storage_format="Large", workspace=workspace + ) if ssm == 200: client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", + json=request_body, + ) if response.status_code == 200: - df = list_qso_settings(dataset = dataset, workspace = workspace) - print(f"{green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace.") + df = list_qso_settings(dataset=dataset, workspace=workspace) + print( + f"{icons.green_dot} Query scale out has been set on the '{dataset}' semantic model within the '{workspace}' workspace." + ) return df else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" else: - print(f"{red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out.") - print("https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites") + print( + f"{icons.red_dot} Failed to set the '{dataset}' semantic model within the '{workspace}' workspace to large semantic model storage format. This is a prerequisite for enabling Query Scale Out." + ) + print( + "https://learn.microsoft.com/power-bi/enterprise/service-premium-scale-out#prerequisites" + ) return - -def set_semantic_model_storage_format(dataset: str, storage_format: str, workspace: Optional[str] = None): +def set_semantic_model_storage_format( + dataset: str, storage_format: str, workspace: Optional[str] = None +): """ Sets the semantic model storage format. @@ -232,7 +286,7 @@ def set_semantic_model_storage_format(dataset: str, storage_format: str, workspa Returns ------- - + """ if workspace is None: @@ -245,35 +299,37 @@ def set_semantic_model_storage_format(dataset: str, storage_format: str, workspa storage_format = storage_format.capitalize() - if storage_format == 'Abf': - storage_format = 'Small' - elif storage_format.startswith('Premium'): - storage_format = 'Large' + if storage_format == "Abf": + storage_format = "Small" + elif storage_format.startswith("Premium"): + storage_format = "Large" - storageFormats = ['Small', 'Large'] + storageFormats = ["Small", "Large"] - if storage_format == 'Large': - request_body = { - "targetStorageMode": "PremiumFiles" - } - elif storage_format == 'Small': - request_body = { - "targetStorageMode": "Abf" - } + if storage_format == "Large": + request_body = {"targetStorageMode": "PremiumFiles"} + elif storage_format == "Small": + request_body = {"targetStorageMode": "Abf"} else: - print(f"{red_dot} Invalid storage format value. Valid options: {storageFormats}.") + print( + f"{icons.red_dot} Invalid storage format value. Valid options: {storageFormats}." + ) return client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json = request_body) + response = client.patch( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}", json=request_body + ) if response.status_code == 200: - return print(f"{green_dot} Semantic model storage format set to '{storage_format}'.") + return print( + f"{icons.green_dot} Semantic model storage format set to '{storage_format}'." + ) else: - return f"{red_dot} {response.status_code}" + return f"{icons.red_dot} {response.status_code}" -def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = None): +def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = None): """ Shows the query scale out settings for a semantic model (or all semantic models within a workspace). @@ -302,28 +358,48 @@ def list_qso_settings(dataset: Optional[str] = None, workspace: Optional[str] = dataset_id = resolve_dataset_id(dataset, workspace) workspace_id = fabric.get_workspace_id() - df = pd.DataFrame(columns=['Dataset Id', 'Dataset Name', 'Storage Mode', 'QSO Auto Sync Enabled', 'QSO Max Read Only Replicas']) + df = pd.DataFrame( + columns=[ + "Dataset Id", + "Dataset Name", + "Storage Mode", + "QSO Auto Sync Enabled", + "QSO Max Read Only Replicas", + ] + ) client = fabric.PowerBIRestClient() response = client.get(f"/v1.0/myorg/groups/{workspace_id}/datasets") - for v in response.json()['value']: - tsm = v['targetStorageMode'] - if tsm == 'Abf': - sm = 'Small' + for v in response.json()["value"]: + tsm = v["targetStorageMode"] + if tsm == "Abf": + sm = "Small" else: - sm = 'Large' - new_data = {'Dataset Id': v['id'], 'Dataset Name': v['name'], 'Storage Mode': sm, 'QSO Auto Sync Enabled': v['queryScaleOutSettings']['autoSyncReadOnlyReplicas'], 'QSO Max Read Only Replicas': v['queryScaleOutSettings']['maxReadOnlyReplicas'] } + sm = "Large" + new_data = { + "Dataset Id": v["id"], + "Dataset Name": v["name"], + "Storage Mode": sm, + "QSO Auto Sync Enabled": v["queryScaleOutSettings"][ + "autoSyncReadOnlyReplicas" + ], + "QSO Max Read Only Replicas": v["queryScaleOutSettings"][ + "maxReadOnlyReplicas" + ], + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - df['QSO Auto Sync Enabled'] = df['QSO Auto Sync Enabled'].astype('bool') - df['QSO Max Read Only Replicas'] = df['QSO Max Read Only Replicas'].astype('int') - + df["QSO Auto Sync Enabled"] = df["QSO Auto Sync Enabled"].astype("bool") + df["QSO Max Read Only Replicas"] = df["QSO Max Read Only Replicas"].astype("int") + if dataset is not None: - df = df[df['Dataset Id'] == dataset_id] - + df = df[df["Dataset Id"] == dataset_id] + return df -def set_workspace_default_storage_format(storage_format: str, workspace: Optional[str] = None): +def set_workspace_default_storage_format( + storage_format: str, workspace: Optional[str] = None +): """ Sets the default storage format for semantic models within a workspace. @@ -338,17 +414,19 @@ def set_workspace_default_storage_format(storage_format: str, workspace: Optiona Returns ------- - + """ - #https://learn.microsoft.com/en-us/rest/api/power-bi/groups/update-group#defaultdatasetstorageformat + # https://learn.microsoft.com/en-us/rest/api/power-bi/groups/update-group#defaultdatasetstorageformat - storageFormats = ['Small', 'Large'] + storageFormats = ["Small", "Large"] storage_format = storage_format.capitalize() if storage_format not in storageFormats: - print(f"Invalid storage format. Please choose from these options: {storageFormats}.") + print( + f"Invalid storage format. Please choose from these options: {storageFormats}." + ) if workspace is None: workspace_id = fabric.get_workspace_id() @@ -356,15 +434,14 @@ def set_workspace_default_storage_format(storage_format: str, workspace: Optiona else: workspace_id = fabric.resolve_workspace_id(workspace) - request_body = { - "name": workspace, - "defaultDatasetStorageFormat": storage_format - } + request_body = {"name": workspace, "defaultDatasetStorageFormat": storage_format} client = fabric.PowerBIRestClient() - response = client.patch(f"/v1.0/myorg/groups/{workspace_id}", json = request_body) + response = client.patch(f"/v1.0/myorg/groups/{workspace_id}", json=request_body) if response.status_code == 200: - print(f"{green_dot} The default storage format for the '{workspace}' workspace has been updated to '{storage_format}.") + print( + f"{icons.green_dot} The default storage format for the '{workspace}' workspace has been updated to '{storage_format}." + ) else: - print(f"{red_dot} {response.status_code}") \ No newline at end of file + print(f"{icons.red_dot} {response.status_code}") diff --git a/sempy_labs/RefreshSemanticModel.py b/src/sempy_labs/_refresh_semantic_model.py similarity index 50% rename from sempy_labs/RefreshSemanticModel.py rename to src/sempy_labs/_refresh_semantic_model.py index 599bbb7f..a6b0fd95 100644 --- a/sempy_labs/RefreshSemanticModel.py +++ b/src/sempy_labs/_refresh_semantic_model.py @@ -1,18 +1,23 @@ import sempy import sempy.fabric as fabric import time -from .HelperFunctions import resolve_dataset_id +from sempy_labs._helper_functions import resolve_dataset_id from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons +from sempy_labs._helper_functions import resolve_workspace_name_and_id -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log -def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] = None, partitions: Optional[Union[str, List[str]]] = None, refresh_type: Optional[str] = None, retry_count: Optional[int] = 0, apply_refresh_policy: Optional[bool] = True, workspace: Optional[str] = None): - +def refresh_semantic_model( + dataset: str, + tables: Optional[Union[str, List[str]]] = None, + partitions: Optional[Union[str, List[str]]] = None, + refresh_type: Optional[str] = None, + retry_count: Optional[int] = 0, + apply_refresh_policy: Optional[bool] = True, + workspace: Optional[str] = None, +): """ Refreshes a semantic model. @@ -34,10 +39,6 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ if workspace == None: @@ -45,7 +46,7 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] workspace = fabric.resolve_workspace_name(workspace_id) if refresh_type is None: - refresh_type = 'full' + refresh_type = "full" if isinstance(tables, str): tables = [tables] @@ -57,6 +58,7 @@ def refresh_semantic_model(dataset: str, tables: Optional[Union[str, List[str]]] if tables is not None: objects = objects + [{"table": table} for table in tables] if partitions is not None: + def extract_names(partition): parts = partition.split("[") table_name = parts[0].strip("'") @@ -65,85 +67,118 @@ def extract_names(partition): objects = objects + [extract_names(partition) for partition in partitions] - refresh_type = refresh_type.lower().replace('only', 'Only').replace('values', 'Values') + refresh_type = ( + refresh_type.lower().replace("only", "Only").replace("values", "Values") + ) - refreshTypes = ['full', 'automatic', 'dataOnly', 'calculate', 'clearValues', 'defragment'] + refreshTypes = [ + "full", + "automatic", + "dataOnly", + "calculate", + "clearValues", + "defragment", + ] if refresh_type not in refreshTypes: - print(f"{red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}.") + print( + f"{icons.red_dot} Invalid refresh type. Refresh type must be one of these values: {refreshTypes}." + ) return - + if len(objects) == 0: - requestID = fabric.refresh_dataset(dataset = dataset, workspace = workspace, refresh_type = refresh_type, retry_count = retry_count, apply_refresh_policy = apply_refresh_policy) + requestID = fabric.refresh_dataset( + dataset=dataset, + workspace=workspace, + refresh_type=refresh_type, + retry_count=retry_count, + apply_refresh_policy=apply_refresh_policy, + ) else: - requestID = fabric.refresh_dataset(dataset = dataset, workspace = workspace, refresh_type = refresh_type, retry_count = retry_count, apply_refresh_policy = apply_refresh_policy, objects = objects) - print(f"{in_progress} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is in progress...") + requestID = fabric.refresh_dataset( + dataset=dataset, + workspace=workspace, + refresh_type=refresh_type, + retry_count=retry_count, + apply_refresh_policy=apply_refresh_policy, + objects=objects, + ) + print( + f"{icons.in_progress} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is in progress..." + ) if len(objects) != 0: print(objects) while True: - requestDetails = fabric.get_refresh_execution_details(dataset = dataset,refresh_request_id = requestID, workspace = workspace) + requestDetails = fabric.get_refresh_execution_details( + dataset=dataset, refresh_request_id=requestID, workspace=workspace + ) status = requestDetails.status # Check if the refresh has completed - if status == 'Completed': + if status == "Completed": break - elif status == 'Failed': - print(f"{red_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has failed.") + elif status == "Failed": + print( + f"{icons.red_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has failed." + ) return - elif status == 'Cancelled': - print(f"{yellow_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled.") + elif status == "Cancelled": + print( + f"{icons.yellow_dot} The refresh of the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled." + ) return time.sleep(3) - print(f"{green_dot} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is complete.") + print( + f"{icons.green_dot} Refresh of the '{dataset}' semantic model within the '{workspace}' workspace is complete." + ) -@log -def cancel_dataset_refresh(dataset: str, request_id: Optional[str] = None, workspace: Optional[str] = None): +@log +def cancel_dataset_refresh( + dataset: str, request_id: Optional[str] = None, workspace: Optional[str] = None +): """ - Cancels the refresh of a semantic model which was executed via the [Enhanced Refresh API](https://learn.microsoft.com/power-bi/connect-data/asynchronous-refresh). + Cancels the refresh of a semantic model which was executed via the `Enhanced Refresh API `_ Parameters ---------- dataset : str Name of the semantic model. request_id : str, default=None - The request id of a semantic model refresh. + The request id of a semantic model refresh. Defaults to finding the latest active refresh of the semantic model. workspace : str, default=None The Fabric workspace name. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + """ - Returns - ------- - - """ + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) - - rr = fabric.list_refresh_requests(dataset = dataset, workspace = workspace) - rr_filt = rr[rr['Status'] == 'Unknown'] + rr = fabric.list_refresh_requests(dataset=dataset, workspace=workspace) + rr_filt = rr[rr["Status"] == "Unknown"] if request_id == None: if len(rr_filt) == 0: - print(f"{red_dot} There are no active Enhanced API refreshes of the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"{icons.red_dot} There are no active Enhanced API refreshes of the '{dataset}' semantic model within the '{workspace}' workspace." + ) return - request_id = rr_filt['Request Id'].iloc[0] - - dataset_id = resolve_dataset_id(dataset = dataset, workspace = workspace) + request_id = rr_filt["Request Id"].iloc[0] + + dataset_id = resolve_dataset_id(dataset=dataset, workspace=workspace) client = fabric.PowerBIRestClient() - response = client.delete(f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshes/{request_id}") + response = client.delete( + f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshes/{request_id}" + ) if response.status_code == 200: - print(f"{green_dot} The '{request_id}' refresh request for the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled.") + print( + f"{icons.green_dot} The '{request_id}' refresh request for the '{dataset}' semantic model within the '{workspace}' workspace has been cancelled." + ) else: print(response.status_code) - diff --git a/src/sempy_labs/_tom.py b/src/sempy_labs/_tom.py new file mode 100644 index 00000000..c5a163ce --- /dev/null +++ b/src/sempy_labs/_tom.py @@ -0,0 +1,3766 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import re +from datetime import datetime +from sempy_labs._helper_functions import format_dax_object_name +from sempy_labs._list_functions import list_relationships +from sempy_labs._refresh_semantic_model import refresh_semantic_model +from sempy_labs.directlake._fallback import check_fallback_reason +from contextlib import contextmanager +from typing import List, Optional, Union, TYPE_CHECKING +from sempy._utils._log import log +import sempy_labs._icons as icons + +if TYPE_CHECKING: + import Microsoft.AnalysisServices.Tabular + import Microsoft.AnalysisServices.Tabular as TOM + + +checked = "\u2611" +unchecked = "\u2610" +start_bold = "\033[1m" +end_bold = "\033[0m" + + +class TOMWrapper: + """ + Convenience wrapper around the TOM object model for a semantic model. Always use connect_semantic_model function to make sure the TOM object is initialized correctly. + """ + + dataset: str + workspace: str + readonly: bool + tables_added: List[str] + + def __init__(self, dataset, workspace, readonly): + self.dataset = dataset + self.workspace = workspace + self.readonly = readonly + self.tables_added = [] + + self.tom_server = fabric.create_tom_server( + readonly=readonly, workspace=workspace + ) + self.model = self.tom_server.Databases.GetByName(dataset).Model + + def all_columns(self): + """ + Outputs a list of all columns within all tables in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Column] + All columns within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + for c in t.Columns: + if c.Type != TOM.ColumnType.RowNumber: + yield c + + def all_calculated_columns(self): + """ + Outputs a list of all calculated columns within all tables in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Column] + All calculated columns within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + for c in t.Columns: + if c.Type == TOM.ColumnType.Calculated: + yield c + + def all_calculated_tables(self): + """ + Outputs a list of all calculated tables in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Table] + All calculated tables within the semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + if any(p.SourceType == TOM.ColumnType.Calculated for p in t.Partitions): + yield t + + def all_calculation_groups(self): + """ + Outputs a list of all calculation groups in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Table] + All calculation groups within the semantic model. + """ + + for t in self.model.Tables: + if t.CalculationGroup is not None: + yield t + + def all_measures(self): + """ + Outputs a list of all measures in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Measure] + All measures within the semantic model. + """ + + for t in self.model.Tables: + for m in t.Measures: + yield m + + def all_partitions(self): + """ + Outputs a list of all partitions in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Partition] + All partitions within the semantic model. + """ + + for t in self.model.Tables: + for p in t.Partitions: + yield p + + def all_hierarchies(self): + """ + Outputs a list of all hierarchies in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Hierarchy] + All hierarchies within the semantic model. + """ + + for t in self.model.Tables: + for h in t.Hierarchies: + yield h + + def all_levels(self): + """ + Outputs a list of all levels in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.Level] + All levels within the semantic model. + """ + + for t in self.model.Tables: + for h in t.Hierarchies: + for l in h.Levels: + yield l + + def all_calculation_items(self): + """ + Outputs a list of all calculation items in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.CalculationItem] + All calculation items within the semantic model. + """ + + for t in self.model.Tables: + if t.CalculationGroup is not None: + for ci in t.CalculationGroup.CalculationItems: + yield ci + + def all_rls(self): + """ + Outputs a list of all row level security expressions in the semantic model. + + Parameters + ---------- + + Returns + ------- + Iterator[Microsoft.AnalysisServices.Tabular.TablePermission] + All row level security expressions within the semantic model. + """ + + for r in self.model.Roles: + for tp in r.TablePermissions: + yield tp + + def add_measure( + self, + table_name: str, + measure_name: str, + expression: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + ): + """ + Adds a measure to the semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the measure will be created. + measure_name : str + Name of the measure. + expression : str + DAX expression of the measure. + format_string : str, default=None + Format string of the measure. + hidden : bool, default=False + Whether the measure will be hidden or visible. + description : str, default=None + A description of the measure. + display_folder : str, default=None + The display folder in which the measure will reside. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + obj = TOM.Measure() + obj.Name = measure_name + obj.Expression = expression + obj.IsHidden = hidden + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + + self.model.Tables[table_name].Measures.Add(obj) + + def add_calculated_table_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a calculated table column to a calculated table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + source_column : str + The source column for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.CalculatedTableColumn() + obj.Name = column_name + obj.SourceColumn = source_column + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsHidden = hidden + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_data_column( + self, + table_name: str, + column_name: str, + source_column: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a data column to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + source_column : str + The source column for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.DataColumn() + obj.Name = column_name + obj.SourceColumn = source_column + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsHidden = hidden + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_calculated_column( + self, + table_name: str, + column_name: str, + expression: str, + data_type: str, + format_string: Optional[str] = None, + hidden: Optional[bool] = False, + description: Optional[str] = None, + display_folder: Optional[str] = None, + data_category: Optional[str] = None, + key: Optional[bool] = False, + summarize_by: Optional[str] = None, + ): + """ + Adds a calculated column to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the column will be created. + column_name : str + Name of the column. + expression : str + The DAX expression for the column. + data_type : str + The data type of the column. + format_string : str, default=None + Format string of the column. + hidden : bool, default=False + Whether the column will be hidden or visible. + description : str, default=None + A description of the column. + display_folder : str, default=None + The display folder in which the column will reside. + data_category : str, default=None + The data category of the column. + key : bool, default=False + Marks the column as the primary key of the table. + summarize_by : str, default=None + Sets the value for the Summarize By property of the column. + Defaults to None resolves to 'Default'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + data_type = ( + data_type.capitalize() + .replace("Integer", "Int64") + .replace("Datetime", "DateTime") + ) + if summarize_by is None: + summarize_by = "Default" + summarize_by = ( + summarize_by.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + obj = TOM.CalculatedColumn() + obj.Name = column_name + obj.Expression = expression + obj.IsHidden = hidden + obj.DataType = System.Enum.Parse(TOM.DataType, data_type) + obj.IsKey = key + obj.SummarizeBy = System.Enum.Parse(TOM.AggregateFunction, summarize_by) + if format_string is not None: + obj.FormatString = format_string + if description is not None: + obj.Description = description + if display_folder is not None: + obj.DisplayFolder = display_folder + if data_category is not None: + obj.DataCategory = data_category + self.model.Tables[table_name].Columns.Add(obj) + + def add_calculation_item( + self, + table_name: str, + calculation_item_name: str, + expression: str, + ordinal: Optional[int] = None, + format_string_expression: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds a calculation item to a calculation group within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table in which the calculation item will be created. + calculation_item_name : str + Name of the calculation item. + expression : str + The DAX expression for the calculation item. + ordinal : int, default=None + The ordinal of the calculation item. + format_string_expression : str, default=None + The format string expression for the calculation item. + description : str, default=None + A description of the calculation item. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + obj = TOM.CalculationItem() + fsd = TOM.FormatStringDefinition() + obj.Name = calculation_item_name + obj.Expression = expression + if ordinal is not None: + obj.Ordinal = ordinal + if description is not None: + obj.Description = description + if format_string_expression is not None: + obj.FormatStringDefinition = fsd.Expression = format_string_expression + self.model.Tables[table_name].CalculationGroup.CalculationItems.Add(obj) + + def add_role( + self, + role_name: str, + model_permission: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds a role to a semantic model. + + Parameters + ---------- + role_name : str + Name of the role. + model_permission : str, default=None + The model permission for the role. + Defaults to None which resolves to 'Read'. + description : str, default=None + A description of the role. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if model_permission is None: + model_permission = "Read" + + obj = TOM.ModelRole() + obj.Name = role_name + obj.ModelPermission = System.Enum.Parse(TOM.ModelPermission, model_permission) + if description is not None: + obj.Description = description + self.model.Roles.Add(obj) + + def set_rls(self, role_name: str, table_name: str, filter_expression: str): + """ + Sets the row level security permissions for a table within a role. + + Parameters + ---------- + role_name : str + Name of the role. + table_name : str + Name of the table. + filter_expression : str + The DAX expression containing the row level security filter expression logic. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + tp = TOM.TablePermission() + tp.Table = self.model.Tables[table_name] + tp.FilterExpression = filter_expression + + try: + self.model.Roles[role_name].TablePermissions[ + table_name + ].FilterExpression = filter_expression + except: + self.model.Roles[role_name].TablePermissions.Add(tp) + + def set_ols( + self, role_name: str, table_name: str, column_name: str, permission: str + ): + """ + Sets the object level security permissions for a column within a role. + + Parameters + ---------- + role_name : str + Name of the role. + table_name : str + Name of the table. + column_name : str + Name of the column. + permission : str + The object level security permission for the column. + `Permission valid values `_ + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + permission = permission.capitalize() + + if permission not in ["Read", "None", "Default"]: + print(f"ERROR! Invalid 'permission' value.") + return + + cp = TOM.ColumnPermission() + cp.Column = self.model.Tables[table_name].Columns[column_name] + cp.MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + try: + self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions[ + column_name + ].MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) + except: + self.model.Roles[role_name].TablePermissions[ + table_name + ].ColumnPermissions.Add(cp) + + def add_hierarchy( + self, + table_name: str, + hierarchy_name: str, + columns: List[str], + levels: Optional[List[str]] = None, + hierarchy_description: Optional[str] = None, + hierarchy_hidden: Optional[bool] = False, + ): + """ + Adds a hierarchy to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + hierarchy_name : str + Name of the hierarchy. + columns : List[str] + Names of the columns to use within the hierarchy. + levels : List[str], default=None + Names of the levels to use within the hierarhcy (instead of the column names). + hierarchy_description : str, default=None + A description of the hierarchy. + hierarchy_hidden : bool, default=False + Whether the hierarchy is visible or hidden. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + if isinstance(columns, str): + print( + f"The 'levels' parameter must be a list. For example: ['Continent', 'Country', 'City']" + ) + return + if len(columns) == 1: + print(f"There must be at least 2 levels in order to create a hierarchy.") + return + + if levels is None: + levels = columns + + if len(columns) != len(levels): + print( + f"If specifying level names, you must specify a level for each column." + ) + return + + obj = TOM.Hierarchy() + obj.Name = hierarchy_name + obj.IsHidden = hierarchy_hidden + if hierarchy_description is not None: + obj.Description = hierarchy_description + self.model.Tables[table_name].Hierarchies.Add(obj) + + for col in columns: + lvl = TOM.Level() + lvl.Column = self.model.Tables[table_name].Columns[col] + lvl.Name = levels[columns.index(col)] + lvl.Ordinal = columns.index(col) + self.model.Tables[table_name].Hierarchies[hierarchy_name].Levels.Add(lvl) + + def add_relationship( + self, + from_table: str, + from_column: str, + to_table: str, + to_column: str, + from_cardinality: str, + to_cardinality: str, + cross_filtering_behavior: Optional[str] = None, + is_active: Optional[bool] = True, + security_filtering_behavior: Optional[str] = None, + rely_on_referential_integrity: Optional[bool] = False, + ): + """ + Adds a relationship to a semantic model. + + Parameters + ---------- + from_table : str + Name of the table on the 'from' side of the relationship. + from_column : str + Name of the column on the 'from' side of the relationship. + to_table : str + Name of the table on the 'to' side of the relationship. + to_column : str + Name of the column on the 'to' side of the relationship. + from_cardinality : str + The cardinality of the 'from' side of the relationship. Options: ['Many', 'One', 'None']. + to_cardinality : str + The cardinality of the 'to' side of the relationship. Options: ['Many', 'One', 'None']. + cross_filtering_behavior : str, default=None + Setting for the cross filtering behavior of the relationship. Options: ('Automatic', 'OneDirection', 'BothDirections'). + Defaults to None which resolves to 'Automatic'. + is_active : bool, default=True + Setting for whether the relationship is active or not. + security_filtering_behavior : str, default=None + Setting for the security filtering behavior of the relationship. Options: ('None', 'OneDirection', 'BothDirections'). + Defaults to None which resolves to 'OneDirection'. + rely_on_referential_integrity : bool, default=False + Setting for the rely on referential integrity of the relationship. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if cross_filtering_behavior is None: + cross_filtering_behavior = "Automatic" + if security_filtering_behavior is None: + security_filtering_behavior = "OneDirection" + + from_cardinality = from_cardinality.capitalize() + to_cardinality = to_cardinality.capitalize() + cross_filtering_behavior = cross_filtering_behavior.capitalize() + security_filtering_behavior = security_filtering_behavior.capitalize() + security_filtering_behavior = security_filtering_behavior.replace( + "direct", "Direct" + ) + cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct") + + rel = TOM.SingleColumnRelationship() + rel.FromColumn = self.model.Tables[from_table].Columns[from_column] + rel.FromCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, from_cardinality + ) + rel.ToColumn = self.model.Tables[to_table].Columns[to_column] + rel.ToCardinality = System.Enum.Parse( + TOM.RelationshipEndCardinality, to_cardinality + ) + rel.IsActive = is_active + rel.CrossFilteringBehavior = System.Enum.Parse( + TOM.CrossFilteringBehavior, cross_filtering_behavior + ) + rel.SecurityFilteringBehavior = System.Enum.Parse( + TOM.SecurityFilteringBehavior, security_filtering_behavior + ) + rel.RelyOnReferentialIntegrity = rely_on_referential_integrity + + self.model.Relationships.Add(rel) + + def add_calculation_group( + self, + name: str, + precedence: int, + description: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a calculation group to a semantic model. + + Parameters + ---------- + name : str + Name of the calculation group. + precedence : int + The precedence of the calculation group. + description : str, default=None + A description of the calculation group. + hidden : bool, default=False + Whether the calculation group is hidden/visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + tbl = TOM.Table() + tbl.Name = name + tbl.CalculationGroup = TOM.CalculationGroup() + tbl.CalculationGroup.Precedence = precedence + tbl.IsHidden = hidden + if description is not None: + tbl.Description = description + + part = TOM.Partition() + part.Name = name + part.Source = TOM.CalculationGroupSource() + tbl.Partitions.Add(part) + + sortCol = "Ordinal" + + col1 = TOM.DataColumn() + col1.Name = sortCol + col1.SourceColumn = sortCol + col1.IsHidden = True + col1.DataType = System.Enum.Parse(TOM.DataType, "Int64") + + tbl.Columns.Add(col1) + + col2 = TOM.DataColumn() + col2.Name = "Name" + col2.SourceColumn = "Name" + col2.DataType = System.Enum.Parse(TOM.DataType, "String") + # col.SortByColumn = m.Tables[name].Columns[sortCol] + tbl.Columns.Add(col2) + + self.model.DiscourageImplicitMeasures = True + self.model.Tables.Add(tbl) + + def add_expression( + self, name: str, expression: str, description: Optional[str] = None + ): + """ + Adds an expression to a semantic model. + + Parameters + ---------- + name : str + Name of the expression. + expression: str + The M expression of the expression. + description : str, default=None + A description of the expression. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + exp = TOM.NamedExpression() + exp.Name = name + if description is not None: + exp.Description = description + exp.Kind = TOM.ExpressionKind.M + exp.Expression = expression + + self.model.Expressions.Add(exp) + + def add_translation(self, language: str): + """ + Adds a translation language (culture) to a semantic model. + + Parameters + ---------- + language : str + The language code (i.e. 'it-IT' for Italian). + """ + import Microsoft.AnalysisServices.Tabular as TOM + + cul = TOM.Culture() + cul.Name = language + + try: + self.model.Cultures.Add(cul) + except: + pass + + def add_perspective(self, perspective_name: str): + """ + Adds a perspective to a semantic model. + + Parameters + ---------- + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + persp = TOM.Perspective() + persp.Name = perspective_name + self.model.Perspectives.Add(persp) + + def add_m_partition( + self, + table_name: str, + partition_name: str, + expression: str, + mode: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds an M-partition to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + partition_name : str + Name of the partition. + expression : str + The M expression encapsulating the logic for the partition. + mode : str, default=None + The query mode for the partition. + Defaults to None which resolves to 'Import'. + description : str, default=None + A description for the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + mode = ( + mode.title() + .replace("query", "Query") + .replace(" ", "") + .replace("lake", "Lake") + ) + + mp = TOM.MPartitionSource() + mp.Expression = expression + p = TOM.Partition() + p.Name = partition_name + p.Source = mp + if description is not None: + p.Description = description + if mode is None: + mode = "Default" + p.Mode = System.Enum.Parse(TOM.ModeType, mode) + + self.model.Tables[table_name].Partitions.Add(p) + + def add_entity_partition( + self, + table_name: str, + entity_name: str, + expression: Optional[str] = None, + description: Optional[str] = None, + ): + """ + Adds an entity partition to a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + entity_name : str + Name of the lakehouse table. + expression : TOM Object, default=None + The expression used by the table. + Defaults to None which resolves to the 'DatabaseQuery' expression. + description : str, default=None + A description for the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + ep = TOM.EntityPartitionSource() + ep.Name = table_name + ep.EntityName = entity_name + if expression is None: + ep.ExpressionSource = self.model.Expressions["DatabaseQuery"] + else: + ep.ExpressionSource = expression + p = TOM.Partition() + p.Name = table_name + p.Source = ep + p.Mode = TOM.ModeType.DirectLake + if description is not None: + p.Description = description + + self.model.Tables[table_name].Partitions.Add(p) + + def set_alternate_of( + self, + table_name: str, + column_name: str, + summarization_type: str, + base_table: str, + base_column: Optional[str] = None, + ): + """ + Sets the 'alternate of' property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + summarization_type : str + The summarization type for the column. + `Summarization valid values `_ + base_table : str + Name of the base table for aggregation. + base_column : str + Name of the base column for aggregation + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if base_column is not None and base_table is None: + print( + f"ERROR: If you specify the base table you must also specify the base column" + ) + + summarization_type = ( + summarization_type.replace(" ", "") + .capitalize() + .replace("Groupby", "GroupBy") + ) + + summarizationTypes = ["Sum", "GroupBy", "Count", "Min", "Max"] + if summarization_type not in summarizationTypes: + print( + f"The 'summarization_type' parameter must be one of the following valuse: {summarizationTypes}." + ) + return + + ao = TOM.AlternateOf() + ao.Summarization = System.Enum.Parse(TOM.SummarizationType, summarization_type) + if base_column is not None: + ao.BaseColumn = self.model.Tables[base_table].Columns[base_column] + else: + ao.BaseTable = self.model.Tables[base_table] + + self.model.Tables[table_name].Columns[column_name].AlternateOf = ao + + # Hide agg table and columns + t = self.model.Tables[table_name] + t.IsHidden = True + for c in t.Columns: + c.IsHidden = True + + def remove_alternate_of(self, table_name: str, column_name: str): + """ + Removes the 'alternate of' property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + + Returns + ------- + + """ + + self.model.Tables[table_name].Columns[column_name].AlternateOf = None + + def get_annotations( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.Annotation": + """ + Shows all annotations for a given object within a semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.Annotation + TOM objects of all the annotations on a particular object within the semantic model. + """ + + # df = pd.DataFrame(columns=['Name', 'Value']) + + for a in object.Annotations: + # new_data = {'Name': a.Name, 'Value': a.Value} + yield a + # df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + def set_annotation(self, object, name: str, value: str): + """ + Sets an annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + value : str + Value of the annotation. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + ann = TOM.Annotation() + ann.Name = name + ann.Value = value + + try: + object.Annotations[name].Value = value + except: + object.Annotations.Add(ann) + + def get_annotation_value(self, object, name: str): + """ + Obtains the annotation value for a given annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + + Returns + ------- + str + The annotation value. + """ + + return object.Annotations[name].Value + + def remove_annotation(self, object, name: str): + """ + Removes an annotation on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + """ + + object.Annotations.Remove(name) + + def clear_annotations(self, object): + """ + Removes all annotations on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + + object.Annotations.Clear() + + def get_extended_properties( + self, object + ) -> "Microsoft.AnalysisServices.Tabular.ExtendedProperty": + """ + Retrieves all extended properties on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.ExtendedPropertiesCollection + TOM Objects of all the extended properties. + """ + + for a in object.ExtendedProperties: + yield a + + def set_extended_property( + self, object, extended_property_type: str, name: str, value: str + ): + """ + Sets an extended property on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + extended_property_type : str + The extended property type. + `Extended property valid values `_ + name : str + Name of the extended property. + value : str + Value of the extended property. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + extended_property_type = extended_property_type.title() + + if extended_property_type == "Json": + ep = TOM.JsonExtendedProperty() + else: + ep = TOM.StringExtendedProperty() + + ep.Name = name + ep.Value = value + + try: + object.ExtendedProperties[name].Value = value + except: + object.ExtendedProperties.Add(ep) + + def get_extended_property_value(self, object, name: str): + """ + Retrieves the value of an extended property for an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + + Returns + ------- + str + The extended property value. + """ + + return object.ExtendedProperties[name].Value + + def remove_extended_property(self, object, name: str): + """ + Removes an extended property on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + name : str + Name of the annotation. + """ + + object.ExtendedProperties.Remove(name) + + def clear_extended_properties(self, object): + """ + Removes all extended properties on an object within the semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + + object.ExtendedProperties.Clear() + + def in_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Indicates whether an object is contained within a given perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspecitve_name : str + Name of the perspective. + + Returns + ------- + bool + An indication as to whether the object is contained within the given perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) + return + + object.Model.Perspectives[perspective_name] + + try: + if objectType == TOM.ObjectType.Table: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] + elif objectType == TOM.ObjectType.Column: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns[object.Name] + elif objectType == TOM.ObjectType.Measure: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures[object.Name] + elif objectType == TOM.ObjectType.Hierarchy: + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies[object.Name] + return True + except: + return False + + def add_to_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Adds an object to a perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) + return + try: + object.Model.Perspectives[perspective_name] + except: + print(f"The '{perspective_name}' perspective does not exist.") + return + + # try: + if objectType == TOM.ObjectType.Table: + pt = TOM.PerspectiveTable() + pt.Table = object + object.Model.Perspectives[perspective_name].PerspectiveTables.Add(pt) + elif objectType == TOM.ObjectType.Column: + pc = TOM.PerspectiveColumn() + pc.Column = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Add(pc) + elif objectType == TOM.ObjectType.Measure: + pm = TOM.PerspectiveMeasure() + pm.Measure = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Add(pm) + elif objectType == TOM.ObjectType.Hierarchy: + ph = TOM.PerspectiveHierarchy() + ph.Hierarchy = object + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Add(ph) + # except: + # pass + + def remove_from_perspective( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + perspective_name: str, + ): + """ + Removes an object from a perspective. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + perspective_name : str + Name of the perspective. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] + objectType = object.ObjectType + + if objectType not in validObjects: + print( + f"Only the following object types are valid for perspectives: {validObjects}." + ) + return + try: + object.Model.Perspectives[perspective_name] + except: + print(f"The '{perspective_name}' perspective does not exist.") + return + + # try: + if objectType == TOM.ObjectType.Table: + pt = object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Name + ] + object.Model.Perspectives[perspective_name].PerspectiveTables.Remove(pt) + elif objectType == TOM.ObjectType.Column: + pc = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveColumns[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveColumns.Remove(pc) + elif objectType == TOM.ObjectType.Measure: + pm = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveMeasures[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveMeasures.Remove(pm) + elif objectType == TOM.ObjectType.Hierarchy: + ph = ( + object.Model.Perspectives[perspective_name] + .PerspectiveTables[object.Parent.Name] + .PerspectiveHierarchies[object.Name] + ) + object.Model.Perspectives[perspective_name].PerspectiveTables[ + object.Parent.Name + ].PerspectiveHierarchies.Remove(ph) + # except: + # pass + + def set_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + property: str, + value: str, + ): + """ + Sets a translation value for an object's property. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + language : str + The language code. + property : str + The property to set. Options: 'Name', 'Description', 'Display Folder'. + value : str + The transation value. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + self.add_translation(language=language) + + property = property.title() + + validObjects = [ + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.Measure, + TOM.ObjectType.Hierarchy, + ] # , 'Level' + + if object.ObjectType not in validObjects: + print(f"Translations can only be set to {validObjects}.") + return + + mapping = { + "Name": TOM.TranslatedProperty.Caption, + "Description": TOM.TranslatedProperty.Description, + "Display Folder": TOM.TranslatedProperty.DisplayFolder, + } + + prop = mapping.get(property) + + try: + object.Model.Cultures[language] + except: + print( + f"The '{language}' translation language does not exist in the semantic model." + ) + return + + object.Model.Cultures[language].ObjectTranslations.SetTranslation( + object, prop, value + ) + + def remove_translation( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], + language: str, + ): + """ + Removes an object's translation value. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + language : str + The language code. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + o = object.Model.Cultures[language].ObjectTranslations[ + object, TOM.TranslatedProperty.Caption + ] + object.Model.Cultures[language].ObjectTranslations.Remove(o) + + def remove_object(self, object): + """ + Removes an object from a semantic model. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column/measure) within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + # Have to remove translations and perspectives on the object before removing it. + if objType in ["Table", "Column", "Measure", "Hierarchy", "Level"]: + for lang in object.Model.Cultures: + try: + self.remove_translation(object=object, language=lang.Name) + except: + pass + if objType in ["Table", "Column", "Measure", "Hierarchy"]: + for persp in object.Model.Perspectives: + try: + self.remove_from_perspective( + object=object, perspective_name=persp.Name + ) + except: + pass + + if objType == TOM.ObjectType.Column: + object.Parent.Columns.Remove(object.Name) + elif objType == TOM.ObjectType.Measure: + object.Parent.Measures.Remove(object.Name) + elif objType == TOM.ObjectType.Hierarchy: + object.Parent.Hierarchies.Remove(object.Name) + elif objType == TOM.ObjectType.Level: + object.Parent.Levels.Remove(object.Name) + elif objType == TOM.ObjectType.Partition: + object.Parent.Partitions.Remove(object.Name) + elif objType == TOM.ObjectType.Expression: + object.Parent.Expressions.Remove(object.Name) + elif objType == TOM.ObjectType.DataSource: + object.Parent.DataSources.Remove(object.Name) + elif objType == TOM.ObjectType.Role: + object.Parent.Roles.Remove(object.Name) + elif objType == TOM.ObjectType.Relationship: + object.Parent.Relationships.Remove(object.Name) + elif objType == TOM.ObjectType.Culture: + object.Parent.Cultures.Remove(object.Name) + elif objType == TOM.ObjectType.Perspective: + object.Parent.Perspectives.Remove(object.Name) + elif objType == TOM.ObjectType.CalculationItem: + object.Parent.CalculationItems.Remove(object.Name) + elif objType == TOM.ObjectType.TablePermission: + object.Parent.TablePermissions.Remove(object.Name) + + def used_in_relationships(self, object: Union["TOM.Table", "TOM.Column"]): + """ + Shows all relationships in which a table/column is used. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.RelationshipCollection + All relationships in which the table/column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Table: + for r in self.model.Relationships: + if r.FromTable.Name == object.Name or r.ToTable.Name == object.Name: + yield r # , 'Table' + elif objType == TOM.ObjectType.Column: + for r in self.model.Relationships: + if ( + r.FromTable.Name == object.Parent.Name + and r.FromColumn.Name == object.Name + ) or ( + r.ToTable.Name == object.Parent.Name + and r.ToColumn.Name == object.Name + ): + yield r # , 'Column' + + def used_in_levels(self, column: "TOM.Column"): + """ + Shows all levels in which a column is used. + + Parameters + ---------- + object : TOM Object + An column object within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.LevelCollection + All levels in which the column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + for l in self.all_levels(): + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): + yield l + + def used_in_hierarchies(self, column: "TOM.Column"): + """ + Shows all hierarchies in which a column is used. + + Parameters + ---------- + object : TOM Object + An column object within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.HierarchyCollection + All hierarchies in which the column is used. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + for l in self.all_levels(): + if ( + l.Parent.Table.Name == column.Parent.Name + and l.Column.Name == column.Name + ): + yield l.Parent + + def used_in_sort_by(self, column: "TOM.Column"): + """ + Shows all columns in which a column is used for sorting. + + Parameters + ---------- + object : TOM Object + An column object within a semantic model. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.ColumnCollection + All columns in which the column is used for sorting. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + for c in self.model.Tables[column.Parent.Name].Columns: + if c.SortByColumn == column: + yield c + + def used_in_rls( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the filter expressions which reference a given object. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + df_filt = dependencies[dependencies["Object Type"] == "Rows Allowed"] + + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m + + def used_in_data_coverage_definition( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the ... which reference a given object. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + df_filt = dependencies[ + dependencies["Object Type"] == "Data Coverage Definition" + ] + + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m + + def used_in_calc_item( + self, + object: Union["TOM.Table", "TOM.Column", "TOM.Measure"], + dependencies: pd.DataFrame, + ): + """ + Identifies the ... which reference a given object. + + Parameters + ---------- + object : TOM Object + An object (i.e. table/column) within a semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + df_filt = dependencies[dependencies["Object Type"] == "Calculation Item"] + + if objType == TOM.ObjectType.Table: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Table") + & (df_filt["Referenced Table"] == object.Name) + ] + tbls = fil["Table Name"].unique().tolist() + for t in self.model.Tables: + if t.Name in tbls: + yield t + elif objType == TOM.ObjectType.Column: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Column") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + cols = fil["Full Object Name"].unique().tolist() + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + elif objType == TOM.ObjectType.Measure: + fil = df_filt[ + (df_filt["Referenced Object Type"] == "Measure") + & (df_filt["Referenced Table"] == object.Parent.Name) + & (df_filt["Referenced Object"] == object.Name) + ] + meas = fil["Object Name"].unique().tolist() + for m in self.all_measures(): + if m.Name in meas: + yield m + + def hybrid_tables(self): + """ + Outputs the hybrid tables within a semantic model. + + Parameters + ---------- + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection + All hybrid tables within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + if any(p.Mode == TOM.ModeType.Import for p in t.Partitions): + if any(p.Mode == TOM.ModeType.DirectQuery for p in t.Partitions): + yield t + + def date_tables(self): + """ + Outputs the tables which are marked as date tables within a semantic model. + + Parameters + ---------- + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection + All tables marked as date tables within a semantic model. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for t in self.model.Tables: + if t.DataCategory == "Time": + if any( + c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns + ): + yield t + + def is_hybrid_table(self, table_name: str): + """ + Identifies if a table is a hybrid table. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table is a hybrid table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + isHybridTable = False + + if any( + p.Mode == TOM.ModeType.Import + for p in self.model.Tables[table_name].Partitions + ): + if any( + p.Mode == TOM.ModeType.DirectQuery + for p in self.model.Tables[table_name].Partitions + ): + isHybridTable = True + + return isHybridTable + + def is_date_table(self, table_name: str): + """ + Identifies if a table is marked as a date table. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table is marked as a date table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + isDateTable = False + t = self.model.Tables[table_name] + + if t.DataCategory == "Time": + if any(c.IsKey and c.DataType == TOM.DataType.DateTime for c in t.Columns): + isDateTable = True + + return isDateTable + + def mark_as_date_table(self, table_name: str, column_name: str): + """ + Marks a table as a date table. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the date column in the table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + t = self.model.Tables[table_name] + c = t.Columns[column_name] + if c.DataType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} The column specified in the 'column_name' parameter in this function must be of DateTime data type." + ) + return + + daxQuery = f""" + define measure '{table_name}'[test] = + var mn = MIN('{table_name}'[{column_name}]) + var ma = MAX('{table_name}'[{column_name}]) + var x = COUNTROWS(DISTINCT('{table_name}'[{column_name}])) + var y = DATEDIFF(mn, ma, DAY) + 1 + return if(y = x, 1,0) + + EVALUATE + SUMMARIZECOLUMNS( + "1",[test] + ) + """ + df = fabric.evaluate_dax( + dataset=self.dataset, workspace=self.workspace, dax_string=daxQuery + ) + value = df["1"].iloc[0] + if value != "1": + print( + f"{icons.red_dot} The '{column_name}' within the '{table_name}' table does not contain contiguous date values." + ) + return + + # Mark as a date table + t.DataCategory = "Time" + c.Columns[column_name].IsKey = True + print( + f"{icons.green_dot} The '{table_name}' table has been marked as a date table using the '{column_name}' column as its primary date key." + ) + + def has_aggs(self): + """ + Identifies if a semantic model has any aggregations. + + Parameters + ---------- + + Returns + ------- + bool + Indicates if the semantic model has any aggregations. + """ + + hasAggs = False + + for c in self.all_columns(): + if c.AlterateOf is not None: + hasAggs = True + + return hasAggs + + def is_agg_table(self, table_name: str): + """ + Identifies if a table has aggregations. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table has any aggregations. + """ + + t = self.model.Tables[table_name] + + return any(c.AlternateOf is not None for c in t.Columns) + + def has_hybrid_table(self): + """ + Identifies if a semantic model has a hybrid table. + + Parameters + ---------- + + Returns + ------- + bool + Indicates if the semantic model has a hybrid table. + """ + + hasHybridTable = False + + for t in self.model.Tables: + if self.is_hybrid_table(table_name=t.Name): + hasHybridTable = True + + return hasHybridTable + + def has_date_table(self): + """ + Identifies if a semantic model has a table marked as a date table. + + Parameters + ---------- + + Returns + ------- + bool + Indicates if the semantic model has a table marked as a date table. + """ + + hasDateTable = False + + for t in self.model.Tables: + if self.is_date_table(table_name=t.Name): + hasDateTable = True + + return hasDateTable + + def is_direct_lake(self): + """ + Identifies if a semantic model is in Direct Lake mode. + + Parameters + ---------- + + Returns + ------- + bool + Indicates if the semantic model is in Direct Lake mode. + """ + + return any( + p.Mode == TOM.ModeType.DirectLake + for t in self.model.Tables + for p in t.Partitions + ) + + def is_field_parameter(self, table_name: str): + """ + Identifies if a table is a field parameter. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table is a field parameter. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + t = self.model.Tables[table_name] + + return ( + any( + p.SourceType == TOM.PartitionSourceType.Calculated + and "NAMEOF(" in p.Source.Expression + for p in t.Partitions + ) + and all( + "[Value" in c.SourceColumn + for c in t.Columns + if c.Type != TOM.ColumnType.RowNumber + ) + and t.Columns.Count == 4 + ) + + def is_auto_date_table(self, table_name: str): + """ + Identifies if a table is an auto-date table. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + Indicates if the table is an auto-date table. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + isAutoDate = False + + t = self.model.Tables[table_name] + + if t.Name.startswith("LocalDateTable_") or t.Name.startswith( + "DateTableTemplate_" + ): + if any( + p.SourceType == TOM.PartitionSourceType.Calculated for p in t.Partitions + ): + isAutoDate = True + + return isAutoDate + + def set_kpi( + self, + measure_name: str, + target: Union[int, float, str], + lower_bound: float, + upper_bound: float, + lower_mid_bound: Optional[float] = None, + upper_mid_bound: Optional[float] = None, + status_type: Optional[str] = None, + status_graphic: Optional[str] = None, + ): + """ + Sets the properties to add/update a KPI for a measure. + + Parameters + ---------- + measure_name : str + Name of the measure. + target : str, int, float + The target for the KPI. This can either be a number or the name of a different measure in the semantic model. + lower_bound: float + The lower bound for the KPI. + upper_bound : float + The upper bound for the KPI. + lower_mid_bound : float, default=None + The lower-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. + upper_mid_bound : float, default=None + The upper-mid bound for the KPI. Set this if status_type is 'Centered' or 'CenteredReversed'. + status_type : str, default=None + The status type of the KPI. Options: 'Linear', 'LinearReversed', 'Centered', 'CenteredReversed'. + Defaults to None which resolvs to 'Linear'. + status_graphic : str, default=None + The status graphic for the KPI. + Defaults to 'Three Circles Colored'. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + # https://github.com/m-kovalsky/Tabular/blob/master/KPI%20Graphics.md + + if measure_name == target: + print( + f"The 'target' parameter cannot be the same measure as the 'measure_name' parameter." + ) + return + + if status_graphic is None: + status_graphic = "Three Circles Colored" + + statusType = ["Linear", "LinearReversed", "Centered", "CenteredReversed"] + status_type = status_type.title().replace(" ", "") + + if status_type is None: + status_type = "Linear" + + if status_type not in statusType: + print( + f"'{status_type}' is an invalid status_type. Please choose from these options: {statusType}." + ) + return + + if status_type in ["Linear", "LinearReversed"]: + if upper_bound is not None or lower_mid_bound is not None: + print( + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are not used in the 'Linear' and 'LinearReversed' status types. Make sure these parameters are set to None." + ) + return + elif upper_bound <= lower_bound: + print(f"The upper_bound must be greater than the lower_bound.") + return + + if status_type in ["Centered", "CenteredReversed"]: + if upper_mid_bound is None or lower_mid_bound is None: + print( + f"The 'upper_mid_bound' and 'lower_mid_bound' parameters are necessary in the 'Centered' and 'CenteredReversed' status types." + ) + return + elif upper_bound <= upper_mid_bound: + print(f"The upper_bound must be greater than the upper_mid_bound.") + elif upper_mid_bound <= lower_mid_bound: + print(f"The upper_mid_bound must be greater than the lower_mid_bound.") + elif lower_mid_bound <= lower_bound: + print(f"The lower_mid_bound must be greater than the lower_bound.") + + try: + table_name = next( + m.Parent.Name for m in self.all_measures() if m.Name == measure_name + ) + except: + print( + f"The '{measure_name}' measure does not exist in the '{self.dataset}' semantic model within the '{self.workspace}'." + ) + return + + graphics = [ + "Cylinder", + "Five Bars Colored", + "Five Boxes Colored", + "Gauge - Ascending", + "Gauge - Descending", + "Road Signs", + "Shapes", + "Standard Arrow", + "Three Circles Colored", + "Three Flags Colored", + "Three Stars Colored", + "Three Symbols Uncircled Colored", + "Traffic Light", + "Traffic Light - Single", + "Variance Arrow", + "Status Arrow - Ascending", + "Status Arrow - Descending", + ] + + if status_graphic not in graphics: + print( + f"The '{status_graphic}' status graphic is not valid. Please choose from these options: {graphics}." + ) + return + + measure_target = True + + try: + float(target) + tgt = str(target) + measure_target = False + except: + try: + tgt = next( + format_dax_object_name(m.Parent.Name, m.Name) + for m in self.all_measures() + if m.Name == target + ) + except: + print( + f"The '{target}' measure does not exist in the '{self.dataset}' semantic model within the '{self.workspace}'." + ) + + if measure_target: + expr = f"var x = [{measure_name}]/[{target}]\nreturn" + else: + expr = f"var x = [{measure_name}\nreturn" + + if status_type == "Linear": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_bound},-1,\n\t\tif(x<{upper_bound},0,1)))" + elif status_type == "LinearReversed": + expr = f"{expr}\nif(isblank(x),blank(),\nif(x<{lower_bound},1,\n\t\tif(x<{upper_bound},0,-1)))" + elif status_type == "Centered": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},-1,0),\n\t\t\tif(x<{upper_mid_bound},1,\n\t\t\t\tif(x<{upper_bound}0,-1))))" + elif status_type == "CenteredReversed": + expr = f"{expr}\nif(isblank(x),blank(),\n\tif(x<{lower_mid_bound},\n\t\tif(x<{lower_bound},1,0),\n\t\t\tif(x<{upper_mid_bound},-1,\n\t\t\t\tif(x<{upper_bound}0,1))))" + + kpi = TOM.KPI() + kpi.TargetExpression = tgt + kpi.StatusGraphic = status_graphic + kpi.StatusExpression = expr + + ms = self.model.Tables[table_name].Measures[measure_name] + try: + ms.KPI.TargetExpression = tgt + ms.KPI.StatusGraphic = status_graphic + ms.KPI.StatusExpression = expr + except: + ms.KPI = kpi + + def set_aggregations(self, table_name: str, agg_table_name: str): + """ + Sets the aggregations (alternate of) for all the columns in an aggregation table based on a base table. + + Parameters + ---------- + table_name : str + Name of the base table. + agg_table_name : str + Name of the aggregation table. + + Returns + ------- + + """ + + for c in self.model.Tables[agg_table_name].Columns: + + dataType = c.DataType + + if dataType in [ + TOM.DataType.String, + TOM.DataType.Boolean, + TOM.DataType.DateTime, + ]: + sumType = "GroupBy" + else: + sumType = "Sum" + + self.set_alternate_of( + table_name=agg_table_name, + column_name=c.Name, + base_table=table_name, + base_column=c.Name, + summarization_type=sumType, + ) + + def set_is_available_in_mdx( + self, table_name: str, column_name: str, value: Optional[bool] = False + ): + """ + Sets the IsAvailableInMdx property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : bool, default=False + The IsAvailableInMdx property value. + """ + + self.model.Tables[table_name].Columns[column_name].IsAvailableInMdx = value + + def set_summarize_by( + self, table_name: str, column_name: str, value: Optional[str] = None + ): + """ + Sets the SummarizeBy property on a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : bool, default=None + The SummarizeBy property value. + Defaults to none which resolves to 'Default'. + `Aggregate valid values `_ + """ + import System + + values = [ + "Default", + "None", + "Sum", + "Min", + "Max", + "Count", + "Average", + "DistinctCount", + ] + # https://learn.microsoft.com/en-us/dotnet/api/microsoft.analysisservices.tabular.column.summarizeby?view=analysisservices-dotnet#microsoft-analysisservices-tabular-column-summarizeby + + if value is None: + value = "Default" + value = ( + value.capitalize() + .replace("Distinctcount", "DistinctCount") + .replace("Avg", "Average") + ) + + if value not in values: + print( + f"'{value}' is not a valid value for the SummarizeBy property. These are the valid values: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].SummarizeBy = ( + System.Enum.Parse(TOM.AggregateFunction, value) + ) + + def set_direct_lake_behavior(self, direct_lake_behavior: str): + """ + Sets the Direct Lake Behavior property for a semantic model. + + Parameters + ---------- + direct_lake_behavior : str + The DirectLakeBehavior property value. + `DirectLakeBehavior valid values `_ + """ + import System + + direct_lake_behavior = direct_lake_behavior.capitalize() + if direct_lake_behavior.startswith("Auto"): + direct_lake_behavior = "Automatic" + elif direct_lake_behavior.startswith("Directl") or direct_lake_behavior == "Dl": + direct_lake_behavior = "DirectLakeOnly" + elif direct_lake_behavior.startswith("Directq") or direct_lake_behavior == "Dq": + direct_lake_behavior = "DirectQueryOnly" + + dlValues = ["Automatic", "DirectLakeOnly", "DirectQueryOnly"] + + if direct_lake_behavior not in dlValues: + print( + f"The 'direct_lake_behavior' parameter must be one of these values: {dlValues}." + ) + return + + self.model.DirectLakeBehavior = System.Enum.Parse( + TOM.DirectLakeBehavior, direct_lake_behavior + ) + + def add_table( + self, + name: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a table to the semantic model. + + Parameters + ---------- + name : str + Name of the table. + description : str, default=None + A description of the table. + data_catgegory : str, default=None + The data category for the table. + hidden : bool, default=False + Whether the table is hidden or visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + t = TOM.Table() + t.Name = name + if description is not None: + t.Description = description + if data_category is not None: + t.DataCategory = data_category + t.Hidden = hidden + self.model.Tables.Add(t) + + def add_calculated_table( + self, + name: str, + expression: str, + description: Optional[str] = None, + data_category: Optional[str] = None, + hidden: Optional[bool] = False, + ): + """ + Adds a calculated table to the semantic model. + + Parameters + ---------- + name : str + Name of the table. + expression : str + The DAX expression for the calculated table. + description : str, default=None + A description of the table. + data_catgegory : str, default=None + The data category for the table. + hidden : bool, default=False + Whether the table is hidden or visible. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + par = TOM.Partition() + par.Name = name + + parSource = TOM.CalculatedPartitionSource() + parSource.Expression = expression + par.Source = parSource + + t = TOM.Table() + t.Name = name + if description is not None: + t.Description = description + if data_category is not None: + t.DataCategory = data_category + t.Hidden = hidden + t.Partitions.Add(par) + self.model.Tables.Add(t) + + def add_field_parameter(self, table_name: str, objects: List[str]): + """ + Adds a table to the semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + objects : List[str] + The columns/measures to be included in the field parameter. + Columns must be specified as such : 'Table Name'[Column Name]. + Measures may be formatted as '[Measure Name]' or 'Measure Name'. + """ + + if isinstance(objects, str): + print(f"The 'objects' parameter must be a list of columns/measures.") + return + if len(objects) == 1: + print( + f"There must be more than one object (column/measure) within the objects parameter." + ) + return + + expr = "" + i = 0 + for obj in objects: + success = False + for m in self.all_measures(): + if obj == "[" + m.Name + "]" or obj == m.Name: + expr = ( + expr + + "\n\t" + + '("' + + m.Name + + '", NAMEOF([' + + m.Name + + "]), " + + str(i) + + ")," + ) + success = True + for c in self.all_columns(): + fullObjName = format_dax_object_name(c.Parent.Name, c.Name) + if obj == fullObjName or obj == c.Parent.Name + "[" + c.Name + "]": + expr = ( + expr + + "\n\t" + + '("' + + c.Name + + '", NAMEOF(' + + fullObjName + + "), " + + str(i) + + ")," + ) + success = True + if not success: + print( + f"The '{obj}' object was not found in the '{self.dataset}' semantic model." + ) + return + else: + i += 1 + + expr = "{" + expr.rstrip(",") + "\n}" + + self.add_calculated_table(name=table_name, expression=expr) + + col2 = table_name + " Fields" + col3 = table_name + " Order" + + self.add_calculated_table_column( + table_name=table_name, + column_name=table_name, + source_column="[Value1]", + data_type="String", + hidden=False, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col2, + source_column="[Value2]", + data_type="String", + hidden=True, + ) + self.add_calculated_table_column( + table_name=table_name, + column_name=col3, + source_column="[Value3]", + data_type="Int64", + hidden=True, + ) + + self.set_extended_property( + self=self, + object=self.model.Tables[table_name].Columns[col2], + extended_property_type="Json", + name="ParameterMetadata", + value='{"version":3,"kind":2}', + ) + + rcd = TOM.RelatedColumnDetails() + gpc = TOM.GroupByColumn() + gpc.GroupingColumn = self.model.Tables[table_name].Columns[col2] + rcd.GroupByColumns.Add(gpc) + + # Update column properties + self.model.Tables[table_name].Columns[col2].SortByColumn = self.model.Tables[ + table_name + ].Columns[col3] + self.model.Tables[table_name].Columns[table_name].RelatedColumnDetails = rcd + + self.tables_added.append(table_name) + + def remove_vertipaq_annotations(self): + """ + Removes the annotations set using the [set_vertipaq_annotations] function. + """ + + for t in self.model.Tables: + for a in t.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=t, name=a.Name) + for c in t.Columns: + for a in c.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=c, name=a.Name) + for h in t.Hierarchies: + for a in h.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=h, name=a.Name) + for p in t.Partitions: + for a in p.Annotations: + if a.Name.startswith("Vertipaq_"): + self.remove_annotation(object=p, name=a.Name) + for r in self.model.Relationships: + for a in r.Annotations: + if a.Name.startswith("Veripaq_"): + self.remove_annotation(object=r, name=a.Name) + + def set_vertipaq_annotations(self): + """ + Saves Vertipaq Analyzer statistics as annotations on objects in the semantic model. + """ + + dfT = fabric.list_tables( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfC = fabric.list_columns( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + # intList = ['Total Size']#, 'Data Size', 'Dictionary Size', 'Hierarchy Size'] + dfCSum = dfC.groupby(["Table Name"])["Total Size"].sum().reset_index() + dfTable = pd.merge( + dfT[["Name", "Type", "Row Count"]], + dfCSum[["Table Name", "Total Size"]], + left_on="Name", + right_on="Table Name", + how="inner", + ) + dfP = fabric.list_partitions( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfP["Records per Segment"] = round( + dfP["Record Count"] / dfP["Segment Count"], 2 + ) + dfH = fabric.list_hierarchies( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + dfR = list_relationships( + dataset=self.dataset, workspace=self.workspace, extended=True + ) + + for t in self.model.Tables: + dfT_filt = dfTable[dfTable["Name"] == t.Name] + rowCount = str(dfT_filt["Row Count"].iloc[0]) + totalSize = str(dfT_filt["Total Size"].iloc[0]) + self.set_annotation(object=t, name="Vertipaq_RowCount", value=rowCount) + self.set_annotation(object=t, name="Vertipaq_TableSize", value=totalSize) + for c in t.Columns: + dfC_filt = dfC[ + (dfC["Table Name"] == t.Name) & (dfC["Column Name"] == c.Name) + ] + totalSize = str(dfC_filt["Total Size"].iloc[0]) + dataSize = str(dfC_filt["Data Size"].iloc[0]) + dictSize = str(dfC_filt["Dictionary Size"].iloc[0]) + hierSize = str(dfC_filt["Hierarchy Size"].iloc[0]) + card = str(dfC_filt["Column Cardinality"].iloc[0]) + self.set_annotation( + object=c, name="Vertipaq_TotalSize", value=totalSize + ) + self.set_annotation(object=c, name="Vertipaq_DataSize", value=dataSize) + self.set_annotation( + object=c, name="Vertipaq_DictionarySize", value=dictSize + ) + self.set_annotation( + object=c, name="Vertipaq_HierarchySize", value=hierSize + ) + self.set_annotation(object=c, name="Vertipaq_Cardinality", value=card) + for p in t.Partitions: + dfP_filt = dfP[ + (dfP["Table Name"] == t.Name) & (dfP["Partition Name"] == p.Name) + ] + recordCount = str(dfP_filt["Record Count"].iloc[0]) + segmentCount = str(dfP_filt["Segment Count"].iloc[0]) + rpS = str(dfP_filt["Records per Segment"].iloc[0]) + self.set_annotation( + object=p, name="Vertipaq_RecordCount", value=recordCount + ) + self.set_annotation( + object=p, name="Vertipaq_SegmentCount", value=segmentCount + ) + self.set_annotation( + object=p, name="Vertipaq_RecordsPerSegment", value=rpS + ) + for h in t.Hierarchies: + dfH_filt = dfH[ + (dfH["Table Name"] == t.Name) & (dfH["Hierarchy Name"] == h.Name) + ] + usedSize = str(dfH_filt["Used Size"].iloc[0]) + self.set_annotation(object=h, name="Vertipaq_UsedSize", value=usedSize) + for r in self.model.Relationships: + dfR_filt = dfR[dfR["Relationship Name"] == r.Name] + relSize = str(dfR_filt["Used Size"].iloc[0]) + self.set_annotation(object=r, name="Vertipaq_UsedSize", value=relSize) + + try: + runId = self.get_annotation_value(object=self.model, name="Vertipaq_Run") + runId = str(int(runId) + 1) + except: + runId = "1" + self.set_annotation(object=self.model, name="Vertipaq_Run", value=runId) + + def row_count(self, object: Union["TOM.Partition", "TOM.Table"]): + """ + Obtains the row count of a table or partition within a semantic model. + + Parameters + ---------- + object : TOM Object + The table/partition object within the semantic model. + + Returns + ------- + int + Number of rows within the TOM object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Table: + result = self.get_annotation_value(object=object, name="Vertipaq_RowCount") + elif objType == TOM.ObjectType.Partition: + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordCount" + ) + + return int(result) + + def records_per_segment(self, object: "TOM.Partition"): + """ + Obtains the records per segment of a partition within a semantic model. + + Parameters + ---------- + object : TOM Object + The partition object within the semantic model. + + Returns + ------- + float + Number of records per segment within the partition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Partition: + result = self.get_annotation_value( + object=object, name="Vertipaq_RecordsPerSegment" + ) + + return float(result) + + def used_size(self, object: Union["TOM.Hierarchy", "TOM.Relationship"]): + """ + Obtains the used size of a hierarchy or relationship within a semantic model. + + Parameters + ---------- + object : TOM Object + The hierarhcy/relationship object within the semantic model. + + Returns + ------- + int + Used size of the TOM object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Hierarchy: + result = self.get_annotation_value(object=object, name="Vertipaq_UsedSize") + elif objType == TOM.ObjectType.Relationship: + result = self.get_annotation_value(object=object, name="Vertipaq_UsedSize") + + return int(result) + + def data_size(self, column: "TOM.Column"): + """ + Obtains the data size of a column within a semantic model. + + Parameters + ---------- + column : TOM Object + The column object within the semantic model. + + Returns + ------- + int + Data size of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value(object=column, name="Vertipaq_DataSize") + + return int(result) + + def dictionary_size(self, column: "TOM.Column"): + """ + Obtains the dictionary size of a column within a semantic model. + + Parameters + ---------- + column : TOM Object + The column object within the semantic model. + + Returns + ------- + int + Dictionary size of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value( + object=column, name="Vertipaq_DictionarySize" + ) + + return int(result) + + def total_size(self, object: Union["TOM.Table", "TOM.Column"]): + """ + Obtains the data size of a table/column within a semantic model. + + Parameters + ---------- + object : TOM Object + The table/column object within the semantic model. + + Returns + ------- + int + Total size of the TOM table/column. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value(object=object, name="Vertipaq_TotalSize") + elif objType == TOM.ObjectType.Table: + result = self.get_annotation_value(object=object, name="Vertipaq_TotalSize") + + return int(result) + + def cardinality(self, column: "TOM.Column"): + """ + Obtains the cardinality of a column within a semantic model. + + Parameters + ---------- + column : TOM Object + The column object within the semantic model. + + Returns + ------- + int + Cardinality of the TOM column. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = column.ObjectType + + if objType == TOM.ObjectType.Column: + result = self.get_annotation_value( + object=column, name="Vertipaq_Cardinality" + ) + + return int(result) + + def depends_on(self, object, dependencies: pd.DataFrame): + """ + Obtains the objects on which the specified object depends. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + Objects on which the specified object depends. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + objName = object.Name + objParentName = object.Parent.Name + + if objType == TOM.ObjectType.Table: + objParentName = objName + + fil = dependencies[ + (dependencies["Object Type"] == objType) + & (dependencies["Table Name"] == objParentName) + & (dependencies["Object Name"] == objName) + ] + meas = ( + fil[fil["Referenced Object Type"] == "Measure"]["Referenced Object"] + .unique() + .tolist() + ) + cols = ( + fil[fil["Referenced Object Type"] == "Column"][ + "Referenced Full Object Name" + ] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Referenced Object Type"] == "Table"]["Referenced Table"] + .unique() + .tolist() + ) + for m in self.all_measures(): + if m.Name in meas: + yield m + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + for t in self.model.Tables: + if t.Name in tbls: + yield t + + def referenced_by(self, object, dependencies: pd.DataFrame): + """ + Obtains the objects which reference the specified object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.TableCollection, Microsoft.AnalysisServices.Tabular.ColumnCollection, Microsoft.AnalysisServices.Tabular.MeasureCollection + Objects which reference the specified object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + objType = object.ObjectType + objName = object.Name + objParentName = object.Parent.Name + + if objType == TOM.ObjectType.Table: + objParentName = objName + + fil = dependencies[ + (dependencies["Referenced Object Type"] == objType) + & (dependencies["Referenced Table"] == objParentName) + & (dependencies["Referenced Object"] == objName) + ] + meas = fil[fil["Object Type"] == "Measure"]["Object Name"].unique().tolist() + cols = ( + fil[fil["Object Type"].isin(["Column", "Calc Column"])]["Full Object Name"] + .unique() + .tolist() + ) + tbls = ( + fil[fil["Object Type"].isin(["Table", "Calc Table"])]["Table Name"] + .unique() + .tolist() + ) + for m in self.all_measures(): + if m.Name in meas: + yield m + for c in self.all_columns(): + if format_dax_object_name(c.Parent.Name, c.Name) in cols: + yield c + for t in self.model.Tables: + if t.Name in tbls: + yield t + + def fully_qualified_measures( + self, object: "TOM.Measure", dependencies: pd.DataFrame + ): + """ + Obtains all fully qualified measure references for a given object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.MeasureCollection + All fully qualified measure references for a given object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + for obj in self.depends_on(object=object, dependencies=dependencies): + if obj.ObjectType == TOM.ObjectType.Measure: + if (obj.Parent.Name + obj.Name in object.Expression) or ( + format_dax_object_name(obj.Parent.Name, obj.Name) + in object.Expression + ): + yield obj + + def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): + """ + Obtains all unqualified column references for a given object. + + Parameters + ---------- + object : TOM Object + The TOM object within the semantic model. + dependencies : pandas.DataFrame + A pandas dataframe with the output of the 'get_model_calc_dependencies' function. + + Returns + ------- + Microsoft.AnalysisServices.Tabular.ColumnCollection + All unqualified column references for a given object. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + def create_pattern(a, b): + return r"(? 0: + usingView = True + + return usingView + + def has_incremental_refresh_policy(self, table_name: str): + """ + Identifies whether a table has an incremental refresh policy. + + Parameters + ---------- + table_name : str + Name of the table. + + Returns + ------- + bool + An indicator whether a table has an incremental refresh policy. + """ + + hasRP = False + rp = self.model.Tables[table_name].RefreshPolicy + + if rp is not None: + hasRP = True + + return hasRP + + def show_incremental_refresh_policy(self, table_name: str): + """ + Prints the incremental refresh policy for a table. + + Parameters + ---------- + table_name : str + Name of the table. + """ + + rp = self.model.Tables[table_name].RefreshPolicy + + if rp is None: + print( + f"The '{table_name}' table in the '{self.dataset}' semantic model within the '{self.workspace}' workspace does not have an incremental refresh policy." + ) + else: + print(f"Table Name: {table_name}") + rwGran = str(rp.RollingWindowGranularity).lower() + icGran = str(rp.IncrementalGranularity).lower() + if rp.RollingWindowPeriods > 1: + print( + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}s{end_bold} before refresh date." + ) + else: + print( + f"Archive data starting {start_bold}{rp.RollingWindowPeriods} {rwGran}{end_bold} before refresh date." + ) + if rp.IncrementalPeriods > 1: + print( + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}s{end_bold} before refresh date." + ) + else: + print( + f"Incrementally refresh data {start_bold}{rp.IncrementalPeriods} {icGran}{end_bold} before refresh date." + ) + + if rp.Mode == TOM.RefreshPolicyMode.Hybrid: + print( + f"{checked} Get the latest data in real time with DirectQuery (Premium only)" + ) + else: + print( + f"{unchecked} Get the latest data in real time with DirectQuery (Premium only)" + ) + if rp.IncrementalPeriodsOffset == -1: + print(f"{checked} Only refresh complete days") + else: + print(f"{unchecked} Only refresh complete days") + if len(rp.PollingExpression) > 0: + pattern = r"\[([^\]]+)\]" + match = re.search(pattern, rp.PollingExpression) + if match: + col = match[0][1:-1] + fullCol = format_dax_object_name(table_name, col) + print( + f"{checked} Detect data changes: {start_bold}{fullCol}{end_bold}" + ) + else: + print(f"{unchecked} Detect data changes") + + def update_incremental_refresh_policy( + self, + table_name: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): + """ + Updates the incremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + incremental_granularity : str + Granularity of the (most recent) incremental refresh range. + incremental_periods : int + Number of periods for the incremental refresh range. + rolling_window_granularity : str + Target granularity of the rolling window for the whole semantic model. + rolling_window_periods : int + Number of periods for the rolling window for the whole semantic model. + only_refresh_complete_days : bool, default=False + Lag or leading periods from Now() to the rolling window head. + detect_data_changes_column : str, default=None + The column to use for detecting data changes. + Defaults to None which resolves to not detecting data changes. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if not self.has_incremental_refresh_policy(table_name=table_name): + print( + f"The '{table_name}' table does not have an incremental refresh policy." + ) + return + + incGran = ["Day", "Month", "Quarter", "Year"] + + incremental_granularity = incremental_granularity.capitalize() + rolling_window_granularity = rolling_window_granularity.capitalize() + + if incremental_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + ) + return + if rolling_window_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + ) + return + + if rolling_window_periods < 1: + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + ) + return + if incremental_periods < 1: + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + ) + return + + t = self.model.Tables[table_name] + + if detect_data_changes_column is not None: + dc = t.Columns[detect_data_changes_column] + + if dc.DataType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + ) + return + + rp = TOM.BasicRefreshPolicy() + rp.IncrementalPeriods = incremental_periods + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) + rp.RollingWindowPeriods = rolling_window_periods + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) + rp.SourceExpression = t.RefreshPolicy.SourceExpression + + if only_refresh_complete_days: + rp.IncrementalPeriodsOffset = -1 + else: + rp.IncrementalPeriodOffset = 0 + + if detect_data_changes_column is not None: + fullDC = format_dax_object_name(table_name, detect_data_changes_column) + ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" + rp.PollingExpression = ddcExpr + else: + rp.PollingExpression = None + + t.RefreshPolicy = rp + + self.show_incremental_refresh_policy(table_name=table_name) + + def add_incremental_refresh_policy( + self, + table_name: str, + column_name: str, + start_date: str, + end_date: str, + incremental_granularity: str, + incremental_periods: int, + rolling_window_granularity: str, + rolling_window_periods: int, + only_refresh_complete_days: Optional[bool] = False, + detect_data_changes_column: Optional[str] = None, + ): + """ + Adds anincremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + The DateTime column to be used for the RangeStart and RangeEnd parameters. + start_date : str + The date to be used for the RangeStart parameter. + end_date : str + The date to be used for the RangeEnd parameter. + incremental_granularity : str + Granularity of the (most recent) incremental refresh range. + incremental_periods : int + Number of periods for the incremental refresh range. + rolling_window_granularity : str + Target granularity of the rolling window for the whole semantic model. + rolling_window_periods : int + Number of periods for the rolling window for the whole semantic model. + only_refresh_complete_days : bool, default=False + Lag or leading periods from Now() to the rolling window head. + detect_data_changes_column : str, default=None + The column to use for detecting data changes. + Defaults to None which resolves to not detecting data changes. + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + # https://learn.microsoft.com/en-us/power-bi/connect-data/incremental-refresh-configure + + incGran = ["Day", "Month", "Quarter", "Year"] + + incremental_granularity = incremental_granularity.capitalize() + rolling_window_granularity = rolling_window_granularity.capitalize() + + if incremental_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + ) + return + if rolling_window_granularity not in incGran: + print( + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + ) + return + + if rolling_window_periods < 1: + print( + f"{icons.red_dot} Invalid 'rolling_window_periods' value. Must be a value greater than 0." + ) + return + if incremental_periods < 1: + print( + f"{icons.red_dot} Invalid 'incremental_periods' value. Must be a value greater than 0." + ) + return + + date_format = "%m/%d/%Y" + + date_obj_start = datetime.strptime(start_date, date_format) + start_year = date_obj_start.year + start_month = date_obj_start.month + start_day = date_obj_start.day + + date_obj_end = datetime.strptime(end_date, date_format) + end_year = date_obj_end.year + end_month = date_obj_end.month + end_day = date_obj_end.day + + if date_obj_end <= date_obj_start: + print( + f"{icons.red_dot} Invalid 'start_date' or 'end_date'. The 'end_date' must be after the 'start_date'." + ) + return + + t = self.model.Tables[table_name] + + c = t.Columns[column_name] + fcName = format_dax_object_name(table_name, column_name) + dType = c.DataType + + if dType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} The {fcName} column is of '{dType}' data type. The column chosen must be of DateTime data type." + ) + return + + if detect_data_changes_column is not None: + dc = t.Columns[detect_data_changes_column] + dcType = dc.DataType + + if dcType != TOM.DataType.DateTime: + print( + f"{icons.red_dot} Invalid 'detect_data_changes_column' parameter. This column must be of DateTime data type." + ) + return + + # Start changes: + + # Update partition expression + i = 0 + for p in t.Partitions: + if p.SourceType != TOM.PartitionSourceType.M: + print( + f"{icons.red_dot} Invalid partition source type. Incremental refresh can only be set up if the table's partition is an M-partition." + ) + return + elif i == 0: + text = p.Expression + text = text.rstrip() + + ind = text.rfind(" ") + 1 + obj = text[ind:] + pattern = r"in\s*[^ ]*" + matches = list(re.finditer(pattern, text)) + + if matches: + last_match = matches[-1] + text_before_last_match = text[: last_match.start()] + + print(text_before_last_match) + else: + print(f"{icons.red_dot} Invalid M-partition expression.") + return + + endExpr = f'#"Filtered Rows IR" = Table.SelectRows({obj}, each [{column_name}] >= RangeStart and [{column_name}] <= RangeEnd)\n#"Filtered Rows IR"' + finalExpr = text_before_last_match + endExpr + + p.Expression = finalExpr + i += 1 + + # Add expressions + self.add_expression( + name="RangeStart", + expression=f'datetime({start_year}, {start_month}, {start_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) + self.add_expression( + name="RangeEnd", + expression=f'datetime({end_year}, {end_month}, {end_day}, 0, 0, 0) meta [IsParameterQuery=true, Type="DateTime", IsParameterQueryRequired=true]', + ) + + # Update properties + rp = TOM.BasicRefreshPolicy() + rp.IncrementalPeriods = incremental_periods + rp.IncrementalGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, incremental_granularity + ) + rp.RollingWindowPeriods = rolling_window_periods + rp.RollingWindowGranularity = System.Enum.Parse( + TOM.RefreshGranularityType, rolling_window_granularity + ) + + if only_refresh_complete_days: + rp.IncrementalPeriodsOffset = -1 + else: + rp.IncrementalPeriodOffset = 0 + + if detect_data_changes_column is not None: + fullDC = format_dax_object_name(table_name, detect_data_changes_column) + ddcExpr = f"let Max{detect_data_changes_column} = List.Max({fullDC}), accountForNull = if Max{detect_data_changes_column} = null then #datetime(1901, 01, 01, 00, 00, 00) else Max{detect_data_changes_column} in accountForNull" + rp.PollingExpression = ddcExpr + + t.RefreshPolicy = rp + + self.show_incremental_refresh_policy(table_name=table_name) + + def apply_refresh_policy( + self, + table_name: str, + effective_date: Optional[datetime] = None, + refresh: Optional[bool] = True, + max_parallelism: Optional[int] = 0, + ): + """ + Applies the incremental refresh policy for a table within a semantic model. + + Parameters + ---------- + table_name : str + Name of the table. + effective_date : DateTime, default=None + The effective date that is used when calculating the partitioning scheme. + refresh : bool, default=True + An indication if partitions of the table should be refreshed or not; the default behavior is to do the refresh. + max_parallelism : int, default=0 + The degree of parallelism during the refresh execution. + """ + + self.model.Tables[table_name].ApplyRefreshPolicy( + effectiveDate=effective_date, + refresh=refresh, + maxParallelism=max_parallelism, + ) + + def set_data_coverage_definition( + self, table_name: str, partition_name: str, expression: str + ): + """ + Sets the data coverage definition for a partition. + + Parameters + ---------- + table_name : str + Name of the table. + partition_name : str + Name of the partition. + expression : str + DAX expression containing the logic for the data coverage definition. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + doc = "https://learn.microsoft.com/analysis-services/tom/table-partitions?view=asallproducts-allversions" + + t = self.model.Tables[table_name] + p = t.Partitions[partition_name] + + ht = self.is_hybrid_table(table_name=table_name) + + if not ht: + print( + f"The data coverage definition property is only applicable to hybrid tables. See the documentation: {doc}." + ) + return + if p.Mode != TOM.ModeType.DirectQuery: + print( + f"The data coverage definition property is only applicable to the DirectQuery partition of a hybrid table. See the documentation: {doc}." + ) + return + + dcd = TOM.DataCoverageDefinition() + dcd.Expression = expression + p.DataCoverageDefinition = dcd + + def set_encoding_hint(self, table_name: str, column_name: str, value: str): + """ + Sets the encoding hint for a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : str + Encoding hint value. + `Encoding hint valid values `_ + """ + import Microsoft.AnalysisServices.Tabular as TOM + import System + + values = ["Default", "Hash", "Value"] + value = value.capitalize() + + if value not in values: + print( + f"{icons.red_dot} Invalid encoding hint value. Please choose from these options: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].EncodingHint = ( + System.Enum.Parse(TOM.EncodingHintType, value) + ) + + def set_data_type(self, table_name: str, column_name: str, value: str): + """ + Sets the data type for a column. + + Parameters + ---------- + table_name : str + Name of the table. + column_name : str + Name of the column. + value : str + The data type. + `Data type valid values `_ + """ + import System + + values = [ + "Binary", + "Boolean", + "DateTime", + "Decimal", + "Double", + "Int64", + "String", + ] + + value = value.replace(" ", "").capitalize() + if value == "Datetime": + value = "DateTime" + elif value.startswith("Int"): + value = "Int64" + elif value.startswith("Bool"): + value = "Boolean" + + if value not in values: + print( + f"{icons.red_dot} Invalid data type. Please choose from these options: {values}." + ) + return + + self.model.Tables[table_name].Columns[column_name].DataType = System.Enum.Parse( + TOM.DataType, value + ) + + def add_time_intelligence( + self, measure_name: str, date_table: str, time_intel: Union[str, List[str]] + ): + """ + Adds time intelligence measures + + Parameters + ---------- + measure_name : str + Name of the measure + date_table : str + Name of the date table. + time_intel : str, List[str] + Time intelligence measures to create (i.e. MTD, YTD, QTD). + """ + + table_name = None + time_intel_options = ["MTD", "QTD", "YTD"] + + if isinstance(time_intel, str): + time_intel = [time_intel] + + # Validate time intelligence variations + for t in time_intel: + t = t.capitalize() + if t not in [time_intel_options]: + print( + f"The '{t}' time intelligence variation is not supported. Valid options: {time_intel_options}." + ) + return + + # Validate measure and extract table name + for m in self.all_measures(): + if m.Name == measure_name: + table_name = m.Parent.Name + + if table_name is None: + print( + f"The '{measure_name}' is not a valid measure in the '{self.dataset}' semantic model within the '{self.workspace}' workspace." + ) + return + + # Validate date table + if not self.is_date_table(date_table): + print( + f"{icons.red_dot} The '{date_table}' table is not a valid date table in the '{self.dataset}' wemantic model within the '{self.workspace}' workspace." + ) + return + + # Extract date key from date table + for c in self.all_columns(): + if c.Parent.Name == date_table and c.IsKey: + date_key = c.Name + + # Create the new time intelligence measures + for t in time_intel: + if t == "MTD": + expr = f"CALCULATE([{measure_name}],DATES{time_intel}('{date_table}'[{date_key}]))" + new_meas_name = f"{measure_name} {t}" + self.add_measure( + table_name=table_name, + measure_name=new_meas_name, + expression=expr, + ) + + def close(self): + if not self.readonly and self.model is not None: + self.model.SaveChanges() + + if len(self.tables_added) > 0: + refresh_semantic_model( + dataset=self.dataset, + tables=self.tables_added, + workspace=self.workspace, + ) + self.model = None + + self.tom_server.Dispose() + + +@log +@contextmanager +def connect_semantic_model( + dataset: str, readonly: bool = True, workspace: Optional[str] = None +): + """ + Connects to the Tabular Object Model (TOM) within a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + readonly: bool, default=True + Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str + A connection to the semantic model's Tabular Object Model. + """ + + # initialize .NET to make sure System and Microsoft.AnalysisServices.Tabular is defined + sempy.fabric._client._utils._init_analysis_services() + + if workspace is None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + tw = TOMWrapper(dataset=dataset, workspace=workspace, readonly=readonly) + try: + yield tw + finally: + tw.close() diff --git a/src/sempy_labs/_translations.py b/src/sempy_labs/_translations.py new file mode 100644 index 00000000..5db33bc4 --- /dev/null +++ b/src/sempy_labs/_translations.py @@ -0,0 +1,378 @@ +import pandas as pd +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def translate_semantic_model( + dataset: str, + languages: Union[str, List[str]], + exclude_characters: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Translates names, descriptions, display folders for all objects in a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + languages : str, List[str] + The language code(s) in which to translate the semantic model. + exclude_characters : str + A string specifying characters which will be replaced by a space in the translation text when sent to the translation service. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + from synapse.ml.services import Translate + from pyspark.sql.functions import col, flatten + from pyspark.sql import SparkSession + from ._tom import connect_semantic_model + + if isinstance(languages, str): + languages = [languages] + + dfPrep = pd.DataFrame( + columns=["Object Type", "Name", "Description", "Display Folder"] + ) + + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: + + if exclude_characters is None: + for o in tom.model.Tables: + new_data = { + "Object Type": "Table", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": None, + "TDisplay Folder": None, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_columns(): + new_data = { + "Object Type": "Column", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_measures(): + new_data = { + "Object Type": "Measure", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_hierarchies(): + new_data = { + "Object Type": "Hierarchy", + "Name": o.Name, + "TName": o.Name, + "Description": o.Description, + "TDescription": o.Description, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": o.DisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + for o in tom.model.Tables: + oName = o.Name + oDescription = o.Description + for s in exclude_characters: + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + new_data = { + "Object Type": "Table", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": None, + "TDisplay Folder": None, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_columns(): + oName = o.Name + oDescription = o.Description + oDisplayFolder = o.DisplayFolder + for s in exclude_characters: + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Column", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_measures(): + oName = o.Name + oDescription = o.Description + oDisplayFolder = o.DisplayFolder + for s in exclude_characters: + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Measure", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for o in tom.all_hierarchies(): + oName = o.Name + oDescription = o.Description + oDisplayFolder = o.DisplayFolder + for s in exclude_characters: + oName = oName.replace(s, " ") + oDescription = oDescription.replace(s, " ") + oDisplayFolder = oDisplayFolder.replace(s, " ") + new_data = { + "Object Type": "Hierarchy", + "Name": o.Name, + "TName": oName, + "Description": o.Description, + "TDescription": oDescription, + "Display Folder": o.DisplayFolder, + "TDisplay Folder": oDisplayFolder, + } + dfPrep = pd.concat( + [dfPrep, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + spark = SparkSession.builder.getOrCreate() + df = spark.createDataFrame(dfPrep) + + columns = ["Name", "Description", "Display Folder"] + + for clm in columns: + columnToTranslate = f"T{clm}" + translate = ( + Translate() + .setTextCol(columnToTranslate) + .setToLanguage(languages) + .setOutputCol("translation") + .setConcurrency(5) + ) + + transDF = ( + translate.transform(df) + .withColumn("translation", flatten(col("translation.translations"))) + .withColumn("translation", col("translation.text")) + .select("Object Type", clm, columnToTranslate, "translation") + ) + + df_panda = transDF.toPandas() + print(f"{icons.in_progress} Translating {clm}s...") + + for lang in languages: + i = languages.index(lang) + tom.add_translation(language=lang) + print(f"{icons.in_progress} Translating into the '{lang}' language...") + + for t in tom.model.Tables: + if t.IsHidden == False: + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Table") + & (df_panda["Name"] == t.Name) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=t, language=lang, property="Name", value=tr + ) + print( + f"{icons.green_dot} Translation '{tr}' set for the '{lang}' language on the '{t.Name}' table." + ) + elif clm == "Description" and t.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Table") + & (df_panda["Description"] == t.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=t, + language=lang, + property="Description", + value=tr, + ) + for c in t.Columns: + if c.IsHidden == False: + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Name"] == c.Name) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Name", + value=tr, + ) + print( + f"{icons.green_dot} Translation '{tr}' set on the '{c.Name}' column within the {t.Name}' table." + ) + elif clm == "Description" and c.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Description"] == c.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" + and c.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Column") + & (df_panda["Display Folder"] == c.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=c, + language=lang, + property="Display Folder", + value=tr, + ) + for h in t.Hierarchies: + if h.IsHidden == False: + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Name"] == h.Name) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Name", + value=tr, + ) + elif clm == "Description" and h.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Description"] == h.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" + and h.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Hierarchy") + & (df_panda["Display Folder"] == h.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=h, + language=lang, + property="Display Folder", + value=tr, + ) + for ms in t.Measures: + if ms.IsHidden == False: + if clm == "Name": + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Name"] == ms.Name) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Name", + value=tr, + ) + print( + f"{icons.green_dot} Translation '{tr}' set on the '{ms.Name}' column within the {t.Name}' table." + ) + elif clm == "Description" and ms.Description is not None: + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Description"] == ms.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Description", + value=tr, + ) + elif ( + clm == "Display Folder" and ms.DisplayFolder is not None + ): + df_filt = df_panda[ + (df_panda["Object Type"] == "Measure") + & (df_panda["Display Folder"] == ms.Description) + ] + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + tom.set_translation( + object=ms, + language=lang, + property="Display Folder", + value=tr, + ) diff --git a/src/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py new file mode 100644 index 00000000..c73dded0 --- /dev/null +++ b/src/sempy_labs/_vertipaq.py @@ -0,0 +1,893 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from IPython.display import display, HTML +import zipfile, os, shutil, datetime, warnings +from pyspark.sql import SparkSession +from sempy_labs._helper_functions import ( + format_dax_object_name, + get_direct_lake_sql_endpoint, + resolve_lakehouse_name, +) +from sempy_labs._list_functions import list_relationships +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from typing import List, Optional, Union +from sempy._utils._log import log + + +@log +def vertipaq_analyzer( + dataset: str, + workspace: Optional[str] = None, + export: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, + read_stats_from_data: Optional[bool] = False, +): + """ + Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name in which the semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + export : str, default=None + Specifying 'zip' will export the results to a zip file in your lakehouse (which can be imported using the import_vertipaq_analyzer function. + Specifying 'table' will export the results to delta tables (appended) in your lakehouse. + Default value: None. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse (for Direct Lake semantic models). + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + read_stats_from_data : bool, default=False + Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse). + + Returns + ------- + + """ + + pd.options.mode.copy_on_write = True + warnings.filterwarnings( + "ignore", message="createDataFrame attempted Arrow optimization*" + ) + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if lakehouse_workspace == None: + lakehouse_workspace = workspace + + dfT = fabric.list_tables(dataset=dataset, extended=True, workspace=workspace) + dfT.rename(columns={"Name": "Table Name"}, inplace=True) + dfC = fabric.list_columns(dataset=dataset, extended=True, workspace=workspace) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfC.rename(columns={"Column Cardinality": "Cardinality"}, inplace=True) + dfH = fabric.list_hierarchies(dataset=dataset, extended=True, workspace=workspace) + dfR = list_relationships(dataset=dataset, extended=True, workspace=workspace) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + dfP = fabric.list_partitions(dataset=dataset, extended=True, workspace=workspace) + dfD = fabric.list_datasets( + workspace=workspace, + additional_xmla_properties=["CompatibilityLevel", "Model.DefaultMode"], + ) + dfD = dfD[dfD["Dataset Name"] == dataset] + dfD["Compatibility Level"] = dfD["Compatibility Level"].astype(int) + isDirectLake = any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()) + dfR["Missing Rows"] = None + + # Direct Lake + if read_stats_from_data: + if isDirectLake: + dfC = pd.merge( + dfC, + dfP[["Table Name", "Query", "Source Type"]], + on="Table Name", + how="left", + ) + dfC_flt = dfC[ + (dfC["Source Type"] == "Entity") + & (~dfC["Column Name"].str.startswith("RowNumber-")) + ] + sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) + + # Get lakehouse name from SQL Endpoint ID + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] + + if len(dfI_filt) == 0: + print( + f"The lakehouse (SQL Endpoint) used by the '{dataset}' semantic model does not reside in the '{lakehouse_workspace}' workspace. Please update the lakehouse_workspace parameter." + ) + else: + lakehouseName = dfI_filt["Display Name"].iloc[0] + + current_workspace_id = fabric.get_workspace_id() + current_workspace = fabric.resolve_workspace_name(current_workspace_id) + if current_workspace != lakehouse_workspace: + lakeTables = get_lakehouse_tables( + lakehouse=lakehouseName, workspace=lakehouse_workspace + ) + + sql_statements = [] + spark = SparkSession.builder.getOrCreate() + # Loop through tables + for lakeTName in dfC_flt["Query"].unique(): + query = "SELECT " + columns_in_table = dfC_flt.loc[ + dfC_flt["Query"] == lakeTName, "Source" + ].unique() + + # Loop through columns within those tables + for scName in columns_in_table: + query = query + f"COUNT(DISTINCT({scName})) AS {scName}, " + + query = query[:-2] + if lakehouse_workspace == current_workspace: + query = query + f" FROM {lakehouseName}.{lakeTName}" + else: + lakeTables_filt = lakeTables[ + lakeTables["Table Name"] == lakeTName + ] + tPath = lakeTables_filt["Location"].iloc[0] + + df = spark.read.format("delta").load(tPath) + tempTableName = "delta_table_" + lakeTName + df.createOrReplaceTempView(tempTableName) + query = query + f" FROM {tempTableName}" + sql_statements.append((lakeTName, query)) + + for o in sql_statements: + tName = o[0] + query = o[1] + + df = spark.sql(query) + + for column in df.columns: + x = df.collect()[0][column] + for i, r in dfC.iterrows(): + if r["Query"] == tName and r["Source"] == column: + dfC.at[i, "Cardinality"] = x + + # Remove column added temporarily + dfC.drop(columns=["Query", "Source Type"], inplace=True) + + # Direct Lake missing rows + dfR = pd.merge( + dfR, + dfP[["Table Name", "Query"]], + left_on="From Table", + right_on="Table Name", + how="left", + ) + dfR.rename(columns={"Query": "From Lake Table"}, inplace=True) + dfR.drop(columns=["Table Name"], inplace=True) + dfR = pd.merge( + dfR, + dfP[["Table Name", "Query"]], + left_on="To Table", + right_on="Table Name", + how="left", + ) + dfR.rename(columns={"Query": "To Lake Table"}, inplace=True) + dfR.drop(columns=["Table Name"], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Source"]], + left_on="From Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Source": "From Lake Column"}, inplace=True) + dfR.drop(columns=["Column Object"], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Source"]], + left_on="To Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Source": "To Lake Column"}, inplace=True) + dfR.drop(columns=["Column Object"], inplace=True) + + spark = SparkSession.builder.getOrCreate() + for i, r in dfR.iterrows(): + fromTable = r["From Lake Table"] + fromColumn = r["From Lake Column"] + toTable = r["To Lake Table"] + toColumn = r["To Lake Column"] + + if lakehouse_workspace == current_workspace: + query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" + else: + tempTableFrom = "delta_table_" + fromTable + tempTableTo = "delta_table_" + toTable + + query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {tempTableFrom} as f\nleft join {tempTableTo} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" + + # query = f"select count(f.{fromColumn}) as {fromColumn}\nfrom {fromTable} as f\nleft join {toTable} as c on f.{fromColumn} = c.{toColumn}\nwhere c.{toColumn} is null" + + df = spark.sql(query) + missingRows = df.collect()[0][0] + dfR.at[i, "Missing Rows"] = missingRows + + dfR["Missing Rows"] = dfR["Missing Rows"].astype(int) + else: + # Calculate missing rows using DAX for non-direct lake + for i, r in dfR.iterrows(): + fromTable = r["From Table"] + fromColumn = r["From Column"] + toTable = r["To Table"] + toColumn = r["To Column"] + isActive = bool(r["Active"]) + fromObject = format_dax_object_name(fromTable, fromColumn) + toObject = format_dax_object_name(toTable, toColumn) + + missingRows = 0 + + query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),isblank({toObject}))\n)" + + if isActive == False: # add userelationship + query = f"evaluate\nsummarizecolumns(\n\"1\",calculate(countrows('{fromTable}'),userelationship({fromObject},{toObject}),isblank({toObject}))\n)" + + result = fabric.evaluate_dax( + dataset=dataset, dax_string=query, workspace=workspace + ) + + try: + missingRows = result.iloc[0, 0] + except: + pass + + dfR.at[i, "Missing Rows"] = missingRows + dfR["Missing Rows"] = dfR["Missing Rows"].astype(int) + + dfTP = dfP.groupby("Table Name")["Partition Name"].count().reset_index() + dfTP.rename(columns={"Partition Name": "Partitions"}, inplace=True) + dfTC = dfC.groupby("Table Name")["Column Name"].count().reset_index() + dfTC.rename(columns={"Column Name": "Columns"}, inplace=True) + + total_size = dfC["Total Size"].sum() + table_sizes = dfC.groupby("Table Name")["Total Size"].sum().reset_index() + table_sizes.rename(columns={"Total Size": "Table Size"}, inplace=True) + + # Columns + dfC_filt = dfC[~dfC["Column Name"].str.startswith("RowNumber-")] + dfC_filt["% DB"] = round((dfC_filt["Total Size"] / total_size) * 100, 2) + dfC_filt = pd.merge(dfC_filt, table_sizes, on="Table Name", how="left") + dfC_filt["% Table"] = round( + (dfC_filt["Total Size"] / dfC_filt["Table Size"]) * 100, 2 + ) + columnList = [ + "Table Name", + "Column Name", + "Type", + "Cardinality", + "Total Size", + "Data Size", + "Dictionary Size", + "Hierarchy Size", + "% Table", + "% DB", + "Data Type", + "Encoding", + "Is Resident", + "Temperature", + "Last Accessed", + ] + + colSize = dfC_filt[columnList].sort_values(by="Total Size", ascending=False) + temp = dfC_filt[columnList].sort_values(by="Temperature", ascending=False) + colSize.reset_index(drop=True, inplace=True) + temp.reset_index(drop=True, inplace=True) + + export_Col = colSize.copy() + + intList = [ + "Cardinality", + "Total Size", + "Data Size", + "Dictionary Size", + "Hierarchy Size", + ] + pctList = ["% Table", "% DB"] + colSize[intList] = colSize[intList].applymap("{:,}".format) + temp[intList] = temp[intList].applymap("{:,}".format) + colSize[pctList] = colSize[pctList].applymap("{:.2f}%".format) + temp[pctList] = temp[pctList].applymap("{:.2f}%".format) + + # Tables + intList = ["Total Size", "Data Size", "Dictionary Size", "Hierarchy Size"] + dfCSum = dfC.groupby(["Table Name"])[intList].sum().reset_index() + dfCSum["% DB"] = round((dfCSum["Total Size"] / total_size) * 100, 2) + + dfTable = pd.merge( + dfT[["Table Name", "Type", "Row Count"]], dfCSum, on="Table Name", how="inner" + ) + dfTable = pd.merge(dfTable, dfTP, on="Table Name", how="left") + dfTable = pd.merge(dfTable, dfTC, on="Table Name", how="left") + dfTable = dfTable.drop_duplicates() # Drop duplicates (temporary) + dfTable = dfTable.sort_values(by="Total Size", ascending=False) + dfTable.reset_index(drop=True, inplace=True) + export_Table = dfTable.copy() + + intList.extend(["Row Count", "Partitions", "Columns"]) + dfTable[intList] = dfTable[intList].applymap("{:,}".format) + pctList = ["% DB"] + dfTable[pctList] = dfTable[pctList].applymap("{:.2f}%".format) + + ## Relationships + # dfR.drop(columns=['Max From Cardinality', 'Max To Cardinality'], inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Cardinality"]], + left_on="From Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Cardinality": "Max From Cardinality"}, inplace=True) + dfR = pd.merge( + dfR, + dfC[["Column Object", "Cardinality"]], + left_on="To Object", + right_on="Column Object", + how="left", + ) + dfR.rename(columns={"Cardinality": "Max To Cardinality"}, inplace=True) + dfR = dfR[ + [ + "From Object", + "To Object", + "Multiplicity", + "Used Size", + "Max From Cardinality", + "Max To Cardinality", + "Missing Rows", + ] + ].sort_values(by="Used Size", ascending=False) + dfR.reset_index(drop=True, inplace=True) + export_Rel = dfR.copy() + intList = [ + "Used Size", + "Max From Cardinality", + "Max To Cardinality", + "Missing Rows", + ] + if read_stats_from_data == False: + intList.remove("Missing Rows") + dfR[intList] = dfR[intList].applymap("{:,}".format) + + ## Partitions + dfP = dfP[ + ["Table Name", "Partition Name", "Mode", "Record Count", "Segment Count"] + ].sort_values( + by="Record Count", ascending=False + ) # , 'Records per Segment' + dfP["Records per Segment"] = round( + dfP["Record Count"] / dfP["Segment Count"], 2 + ) # Remove after records per segment is fixed + dfP.reset_index(drop=True, inplace=True) + export_Part = dfP.copy() + intList = ["Record Count", "Segment Count", "Records per Segment"] + dfP[intList] = dfP[intList].applymap("{:,}".format) + + ## Hierarchies + dfH_filt = dfH[dfH["Level Ordinal"] == 0] + dfH_filt = dfH_filt[["Table Name", "Hierarchy Name", "Used Size"]].sort_values( + by="Used Size", ascending=False + ) + dfH_filt.reset_index(drop=True, inplace=True) + export_Hier = dfH_filt.copy() + intList = ["Used Size"] + dfH_filt[intList] = dfH_filt[intList].applymap("{:,}".format) + + ## Model + if total_size >= 1000000000: + y = total_size / (1024**3) * 1000000000 + elif total_size >= 1000000: + y = total_size / (1024**2) * 1000000 + elif total_size >= 1000: + y = total_size / (1024) * 1000 + y = round(y) + + tblCount = len(dfT) + colCount = len(dfC_filt) + compatLevel = dfD["Compatibility Level"].iloc[0] + defMode = dfD["Model Default Mode"].iloc[0] + + dfModel = pd.DataFrame( + { + "Dataset Name": dataset, + "Total Size": y, + "Table Count": tblCount, + "Column Count": colCount, + "Compatibility Level": compatLevel, + "Default Mode": defMode, + }, + index=[0], + ) + dfModel.reset_index(drop=True, inplace=True) + export_Model = dfModel.copy() + intList = ["Total Size", "Table Count", "Column Count"] + dfModel[intList] = dfModel[intList].applymap("{:,}".format) + + dataFrames = { + "dfModel": dfModel, + "dfTable": dfTable, + "dfP": dfP, + "colSize": colSize, + "temp": temp, + "dfR": dfR, + "dfH_filt": dfH_filt, + } + + dfs = {} + for fileName, df in dataFrames.items(): + dfs[fileName] = df + + visualize_vertipaq(dfs) + + ### Export vertipaq to delta tables in lakehouse + if export in ["table", "zip"]: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the Vertipaq Analyzer results, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + if export == "table": + spark = SparkSession.builder.getOrCreate() + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) + lakeTName = "vertipaq_analyzer_model" + + lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName] + + query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" + + if len(lakeT_filt) == 0: + runId = 1 + else: + dfSpark = spark.sql(query) + maxRunId = dfSpark.collect()[0][0] + runId = maxRunId + 1 + + dfMap = { + "export_Col": ["Columns", export_Col], + "export_Table": ["Tables", export_Table], + "export_Part": ["Partitions", export_Part], + "export_Rel": ["Relationships", export_Rel], + "export_Hier": ["Hierarchies", export_Hier], + "export_Model": ["Model", export_Model], + } + + print(f"Saving Vertipaq Analyzer to delta tables in the lakehouse...\n") + now = datetime.datetime.now() + for key, (obj, df) in dfMap.items(): + df["Timestamp"] = now + df["Workspace Name"] = workspace + df["Dataset Name"] = dataset + df["RunId"] = runId + + colName = "Workspace Name" + df.insert(0, colName, df.pop(colName)) + colName = "Dataset Name" + df.insert(1, colName, df.pop(colName)) + + df.columns = df.columns.str.replace(" ", "_") + + delta_table_name = f"VertipaqAnalyzer_{obj}".lower() + spark_df = spark.createDataFrame(df) + spark_df.write.mode("append").format("delta").saveAsTable(delta_table_name) + print( + f"\u2022 Vertipaq Analyzer results for '{obj}' have been appended to the '{delta_table_name}' delta table." + ) + + ### Export vertipaq to zip file within the lakehouse + if export == "zip": + dataFrames = { + "dfModel": dfModel, + "dfTable": dfTable, + "dfP": dfP, + "colSize": colSize, + "temp": temp, + "dfR": dfR, + "dfH_filt": dfH_filt, + } + + zipFileName = f"{workspace}.{dataset}.zip" + + folderPath = "/lakehouse/default/Files" + subFolderPath = os.path.join(folderPath, "VertipaqAnalyzer") + ext = ".csv" + if not os.path.exists(subFolderPath): + os.makedirs(subFolderPath, exist_ok=True) + zipFilePath = os.path.join(subFolderPath, zipFileName) + + # Create CSV files based on dataframes + for fileName, df in dataFrames.items(): + filePath = os.path.join(subFolderPath, fileName + ext) + df.to_csv(filePath, index=False) + + # Create a zip file and add CSV files to it + with zipfile.ZipFile(zipFilePath, "w") as zipf: + for fileName in dataFrames: + filePath = os.path.join(subFolderPath, fileName + ext) + zipf.write(filePath, os.path.basename(filePath)) + + # Clean up: remove the individual CSV files + for fileName, df in dataFrames.items(): + filePath = os.path.join(subFolderPath, fileName) + ext + if os.path.exists(filePath): + os.remove(filePath) + print( + f"The Vertipaq Analyzer info for the '{dataset}' semantic model in the '{workspace}' workspace has been saved to the 'Vertipaq Analyzer/{zipFileName}' in the default lakehouse attached to this notebook." + ) + + +def visualize_vertipaq(dataframes): + + # Tooltips for columns within the visual + data = [ + { + "ViewName": "Model", + "ColumnName": "Dataset Name", + "Tooltip": "The name of the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Total Size", + "Tooltip": "The size of the model (in bytes)", + }, + { + "ViewName": "Model", + "ColumnName": "Table Count", + "Tooltip": "The number of tables in the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Column Count", + "Tooltip": "The number of columns in the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Compatibility Level", + "Tooltip": "The compatibility level of the semantic model", + }, + { + "ViewName": "Model", + "ColumnName": "Default Mode", + "Tooltip": "The default query mode of the semantic model", + }, + { + "ViewName": "Table", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + {"ViewName": "Table", "ColumnName": "Type", "Tooltip": "The type of table"}, + { + "ViewName": "Table", + "ColumnName": "Row Count", + "Tooltip": "The number of rows in the table", + }, + { + "ViewName": "Table", + "ColumnName": "Total Size", + "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Data Size", + "Tooltip": "The size of the data for all the columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Dictionary Size", + "Tooltip": "The size of the column's dictionary for all columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "Hierarchy Size", + "Tooltip": "The size of hierarchy structures for all columns in this table (in bytes)", + }, + { + "ViewName": "Table", + "ColumnName": "% DB", + "Tooltip": "The size of the table relative to the size of the semantic model", + }, + { + "ViewName": "Table", + "ColumnName": "Partitions", + "Tooltip": "The number of partitions in the table", + }, + { + "ViewName": "Table", + "ColumnName": "Columns", + "Tooltip": "The number of columns in the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Partition Name", + "Tooltip": "The name of the partition within the table", + }, + { + "ViewName": "Partition", + "ColumnName": "Mode", + "Tooltip": "The query mode of the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Record Count", + "Tooltip": "The number of rows in the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Segment Count", + "Tooltip": "The number of segments within the partition", + }, + { + "ViewName": "Partition", + "ColumnName": "Records per Segment", + "Tooltip": "The number of rows per segment", + }, + { + "ViewName": "Column", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Column", + "ColumnName": "Column Name", + "Tooltip": "The name of the column", + }, + {"ViewName": "Column", "ColumnName": "Type", "Tooltip": "The type of column"}, + { + "ViewName": "Column", + "ColumnName": "Cardinality", + "Tooltip": "The number of unique rows in the column", + }, + { + "ViewName": "Column", + "ColumnName": "Total Size", + "Tooltip": "Data Size + Dictionary Size + Hierarchy Size (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Data Size", + "Tooltip": "The size of the data for the column (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Dictionary Size", + "Tooltip": "The size of the column's dictionary (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "Hierarchy Size", + "Tooltip": "The size of hierarchy structures (in bytes)", + }, + { + "ViewName": "Column", + "ColumnName": "% Table", + "Tooltip": "The size of the column relative to the size of the table", + }, + { + "ViewName": "Column", + "ColumnName": "% DB", + "Tooltip": "The size of the column relative to the size of the semantic model", + }, + { + "ViewName": "Column", + "ColumnName": "Data Type", + "Tooltip": "The data type of the column", + }, + { + "ViewName": "Column", + "ColumnName": "Encoding", + "Tooltip": "The encoding type for the column", + }, + { + "ViewName": "Column", + "ColumnName": "Is Resident", + "Tooltip": "Indicates whether the column is in memory or not", + }, + { + "ViewName": "Column", + "ColumnName": "Temperature", + "Tooltip": "A decimal indicating the frequency and recency of queries against the column", + }, + { + "ViewName": "Column", + "ColumnName": "Last Accessed", + "Tooltip": "The time the column was last queried", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Table Name", + "Tooltip": "The name of the table", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Hierarchy Name", + "Tooltip": "The name of the hierarchy", + }, + { + "ViewName": "Hierarchy", + "ColumnName": "Used Size", + "Tooltip": "The size of user hierarchy structures (in bytes)", + }, + { + "ViewName": "Relationship", + "ColumnName": "From Object", + "Tooltip": "The from table/column in the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "To Object", + "Tooltip": "The to table/column in the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Multiplicity", + "Tooltip": "The cardinality on each side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Used Size", + "Tooltip": "The size of the relationship (in bytes)", + }, + { + "ViewName": "Relationship", + "ColumnName": "Max From Cardinality", + "Tooltip": "The number of unique values in the column used in the from side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Max To Cardinality", + "Tooltip": "The number of unique values in the column used in the to side of the relationship", + }, + { + "ViewName": "Relationship", + "ColumnName": "Missing Rows", + "Tooltip": "The number of rows in the 'from' table which do not map to the key column in the 'to' table", + }, + ] + + # Create DataFrame + tooltipDF = pd.DataFrame(data) + + # define the dictionary with {"Tab name":df} + df_dict = { + "Model Summary": dataframes["dfModel"], + "Tables": dataframes["dfTable"], + "Partitions": dataframes["dfP"], + "Columns (Total Size)": dataframes["colSize"], + "Columns (Temperature)": dataframes["temp"], + "Relationships": dataframes["dfR"], + "Hierarchies": dataframes["dfH_filt"], + } + + mapping = { + "Model Summary": "Model", + "Tables": "Table", + "Partitions": "Partition", + "Columns (Total Size)": "Column", + "Columns (Temperature)": "Column", + "Relationships": "Relationship", + "Hierarchies": "Hierarchy", + } + + # Basic styles for the tabs and tab content + styles = """ + + """ + # JavaScript for tab functionality + script = """ + + """ + + # HTML for tabs + tab_html = '
' + content_html = "" + for i, (title, df) in enumerate(df_dict.items()): + tab_id = f"tab{i}" + tab_html += f'' + + vw = mapping.get(title) + + df_html = df.to_html() + for col in df.columns: + tt = None + try: + tooltipDF_filt = tooltipDF[ + (tooltipDF["ViewName"] == vw) & (tooltipDF["ColumnName"] == col) + ] + tt = tooltipDF_filt["Tooltip"].iloc[0] + except: + pass + df_html = df_html.replace(f"{col}", f'{col}') + content_html += ( + f'

{title}

{df_html}
' + ) + tab_html += "
" + + # Display the tabs, tab contents, and run the script + display(HTML(styles + tab_html + content_html + script)) + # Default to open the first tab + display( + HTML("") + ) + + +@log +def import_vertipaq_analyzer(folder_path: str, file_name: str): + """ + Imports and visualizes the vertipaq analyzer info from a saved .zip file in your lakehouse. + + Parameters + ---------- + folder_path : str + The folder within your lakehouse in which the .zip file containing the vertipaq analyzer info has been saved. + file_name : str + The file name of the file which contains the vertipaq analyzer info. + + Returns + ------- + str + A visualization of the Vertipaq Analyzer statistics. + """ + + pd.options.mode.copy_on_write = True + + zipFilePath = os.path.join(folder_path, file_name) + extracted_dir = os.path.join(folder_path, "extracted_dataframes") + + with zipfile.ZipFile(zipFilePath, "r") as zip_ref: + zip_ref.extractall(extracted_dir) + + # Read all CSV files into a dictionary of DataFrames + dfs = {} + for file_name in zip_ref.namelist(): + df = pd.read_csv(extracted_dir + "/" + file_name) + dfs[file_name] = df + + visualize_vertipaq(dfs) + + # Clean up: remove the extracted directory + shutil.rmtree(extracted_dir) diff --git a/src/sempy_labs/directlake/__init__.py b/src/sempy_labs/directlake/__init__.py new file mode 100644 index 00000000..d7d66a65 --- /dev/null +++ b/src/sempy_labs/directlake/__init__.py @@ -0,0 +1,45 @@ +from sempy_labs.directlake._directlake_schema_compare import direct_lake_schema_compare +from sempy_labs.directlake._directlake_schema_sync import direct_lake_schema_sync +from sempy_labs.directlake._fallback import ( + check_fallback_reason, +) +from sempy_labs.directlake._get_directlake_lakehouse import get_direct_lake_lakehouse +from sempy_labs.directlake._get_shared_expression import get_shared_expression +from sempy_labs.directlake._guardrails import ( + get_direct_lake_guardrails, + get_sku_size, + get_directlake_guardrails_for_sku, +) +from sempy_labs.directlake._list_directlake_model_calc_tables import ( + list_direct_lake_model_calc_tables, +) +from sempy_labs.directlake._show_unsupported_directlake_objects import ( + show_unsupported_direct_lake_objects, +) +from sempy_labs.directlake._update_directlake_model_lakehouse_connection import ( + update_direct_lake_model_lakehouse_connection, +) +from sempy_labs.directlake._update_directlake_partition_entity import ( + update_direct_lake_partition_entity, +) +from sempy_labs.directlake._warm_cache import ( + warm_direct_lake_cache_isresident, + warm_direct_lake_cache_perspective, +) + +__all__ = [ + "direct_lake_schema_compare", + "direct_lake_schema_sync", + "check_fallback_reason", + "get_direct_lake_lakehouse", + "get_shared_expression", + "get_direct_lake_guardrails", + "get_sku_size", + "get_directlake_guardrails_for_sku", + "list_direct_lake_model_calc_tables", + "show_unsupported_direct_lake_objects", + "update_direct_lake_model_lakehouse_connection", + "update_direct_lake_partition_entity", + "warm_direct_lake_cache_isresident", + "warm_direct_lake_cache_perspective", +] diff --git a/src/sempy_labs/directlake/_directlake_schema_compare.py b/src/sempy_labs/directlake/_directlake_schema_compare.py new file mode 100644 index 00000000..d34ef558 --- /dev/null +++ b/src/sempy_labs/directlake/_directlake_schema_compare.py @@ -0,0 +1,110 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import ( + format_dax_object_name, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) +from IPython.display import display +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns +from sempy_labs._list_functions import list_tables +from typing import Optional + + +def direct_lake_schema_compare( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Checks that all the tables in a Direct Lake semantic model map to tables in their corresponding lakehouse and that the columns in each table exist. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if lakehouse_workspace is None: + lakehouse_workspace = workspace + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] + + if len(dfI_filt) == 0: + print( + f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified." + ) + return + + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print(f"The '{dataset}' semantic model is not in Direct Lake mode.") + return + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) + + dfT.rename(columns={"Type": "Table Type"}, inplace=True) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] + dfC = pd.merge(dfC, dfP[["Table Name", "Query"]], on="Table Name", how="inner") + dfC = pd.merge( + dfC, + dfT[["Name", "Table Type"]], + left_on="Table Name", + right_on="Name", + how="inner", + ) + dfC["Full Column Name"] = format_dax_object_name(dfC["Query"], dfC["Source"]) + dfC_filt = dfC[dfC["Table Type"] == "Table"] + # Schema compare + missingtbls = dfP_filt[~dfP_filt["Query"].isin(lc["Table Name"])] + missingtbls = missingtbls[["Table Name", "Query"]] + missingtbls.rename(columns={"Query": "Source Table"}, inplace=True) + missingcols = dfC_filt[~dfC_filt["Full Column Name"].isin(lc["Full Column Name"])] + missingcols = missingcols[ + ["Table Name", "Column Name", "Type", "Data Type", "Source"] + ] + missingcols.rename(columns={"Source": "Source Column"}, inplace=True) + + if len(missingtbls) == 0: + print( + f"All tables exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + else: + print( + f"The following tables exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + display(missingtbls) + if len(missingcols) == 0: + print( + f"All columns exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + else: + print( + f"The following columns exist in the '{dataset}' semantic model within the '{workspace}' workspace but do not exist in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + display(missingcols) diff --git a/sempy_labs/DirectLakeSchemaSync.py b/src/sempy_labs/directlake/_directlake_schema_sync.py similarity index 50% rename from sempy_labs/DirectLakeSchemaSync.py rename to src/sempy_labs/directlake/_directlake_schema_sync.py index 8ea4017e..e81961a6 100644 --- a/sempy_labs/DirectLakeSchemaSync.py +++ b/src/sempy_labs/directlake/_directlake_schema_sync.py @@ -1,14 +1,24 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .GetLakehouseColumns import get_lakehouse_columns -from .HelperFunctions import format_dax_object_name, resolve_lakehouse_name, get_direct_lake_sql_endpoint -from typing import List, Optional, Union +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns +from sempy_labs._helper_functions import ( + format_dax_object_name, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) +from typing import Optional from sempy._utils._log import log -@log -def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_to_model: Optional[bool] = False, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +@log +def direct_lake_schema_sync( + dataset: str, + workspace: Optional[str] = None, + add_to_model: Optional[bool] = False, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): """ Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model). @@ -29,17 +39,12 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM import System - if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) @@ -53,46 +58,52 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[(dfI['Id'] == sqlEndpointId)] + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[(dfI["Id"] == sqlEndpointId)] if len(dfI_filt) == 0: - print(f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified.") + print( + f"The SQL Endpoint in the '{dataset}' semantic model in the '{workspace} workspace does not point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace as specified." + ) return - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Source Type'] == 'Entity'] - dfC = fabric.list_columns(dataset = dataset, workspace = workspace) - dfC_filt = dfC[dfC['Table Name'].isin(dfP_filt['Table Name'].values)] - dfC_filt = pd.merge(dfC_filt, dfP_filt[['Table Name', 'Query']], on = 'Table Name', how = 'left') - dfC_filt['Column Object'] = format_dax_object_name(dfC_filt['Query'], dfC_filt['Source']) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Source Type"] == "Entity"] + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfC_filt = dfC[dfC["Table Name"].isin(dfP_filt["Table Name"].values)] + dfC_filt = pd.merge( + dfC_filt, dfP_filt[["Table Name", "Query"]], on="Table Name", how="left" + ) + dfC_filt["Column Object"] = format_dax_object_name( + dfC_filt["Query"], dfC_filt["Source"] + ) lc = get_lakehouse_columns(lakehouse, lakehouse_workspace) - lc_filt = lc[lc['Table Name'].isin(dfP_filt['Query'].values)] + lc_filt = lc[lc["Table Name"].isin(dfP_filt["Query"].values)] mapping = { - 'string': 'String', - 'bigint': 'Int64', - 'int': 'Int64', - 'smallint': 'Int64', - 'boolean': 'Boolean', - 'timestamp': 'DateTime', - 'date': 'DateTime', - 'decimal(38,18)': 'Decimal', - 'double': 'Double' + "string": "String", + "bigint": "Int64", + "int": "Int64", + "smallint": "Int64", + "boolean": "Boolean", + "timestamp": "DateTime", + "date": "DateTime", + "decimal(38,18)": "Decimal", + "double": "Double", } tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) m = tom_server.Databases.GetByName(dataset).Model for i, r in lc_filt.iterrows(): - lakeTName = r['Table Name'] - lakeCName = r['Column Name'] - fullColName = r['Full Column Name'] - dType = r['Data Type'] - - if fullColName not in dfC_filt['Column Object'].values: - dfL = dfP_filt[dfP_filt['Query'] == lakeTName] - tName = dfL['Table Name'].iloc[0] + lakeTName = r["Table Name"] + lakeCName = r["Column Name"] + fullColName = r["Full Column Name"] + dType = r["Data Type"] + + if fullColName not in dfC_filt["Column Object"].values: + dfL = dfP_filt[dfP_filt["Query"] == lakeTName] + tName = dfL["Table Name"].iloc[0] if add_to_model: col = TOM.DataColumn() col.Name = lakeCName @@ -101,11 +112,17 @@ def direct_lake_schema_sync(dataset: str, workspace: Optional[str] = None, add_t try: col.DataType = System.Enum.Parse(TOM.DataType, dt) except: - print(f"ERROR: '{dType}' data type is not mapped properly to the semantic model data types.") + print( + f"ERROR: '{dType}' data type is not mapped properly to the semantic model data types." + ) return m.Tables[tName].Columns.Add(col) - print(f"The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' data type within the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The '{lakeCName}' column has been added to the '{tName}' table as a '{dt}' data type within the '{dataset}' semantic model within the '{workspace}' workspace." + ) else: - print(f"The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace.") + print( + f"The {fullColName} column exists in the lakehouse but not in the '{tName}' table in the '{dataset}' semantic model within the '{workspace}' workspace." + ) m.SaveChanges() diff --git a/sempy_labs/Fallback.py b/src/sempy_labs/directlake/_fallback.py similarity index 55% rename from sempy_labs/Fallback.py rename to src/sempy_labs/directlake/_fallback.py index cad5ee80..38886b6a 100644 --- a/sempy_labs/Fallback.py +++ b/src/sempy_labs/directlake/_fallback.py @@ -3,8 +3,8 @@ import numpy as np from typing import List, Optional, Union -def check_fallback_reason(dataset: str, workspace: Optional[str] = None): +def check_fallback_reason(dataset: str, workspace: Optional[str] = None): """ Shows the reason a table in a Direct Lake semantic model would fallback to DirectQuery. @@ -27,31 +27,36 @@ def check_fallback_reason(dataset: str, workspace: Optional[str] = None): workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] - + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] + if len(dfP_filt) == 0: - print(f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models.") + print( + f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models." + ) else: - df = fabric.evaluate_dax(dataset = dataset,workspace = workspace, - dax_string = - """ + df = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [TableName] AS [Table Name],[FallbackReason] AS [FallbackReasonID] FROM $SYSTEM.TMSCHEMA_DELTA_TABLE_METADATA_STORAGES - """ - ) + """, + ) value_mapping = { - 0: 'No reason for fallback', - 1: 'This table is not framed', - 2: 'This object is a view in the lakehouse', - 3: 'The table does not exist in the lakehouse', - 4: 'Transient error', - 5: 'Using OLS will result in fallback to DQ', - 6: 'Using RLS will result in fallback to DQ' + 0: "No reason for fallback", + 1: "This table is not framed", + 2: "This object is a view in the lakehouse", + 3: "The table does not exist in the lakehouse", + 4: "Transient error", + 5: "Using OLS will result in fallback to DQ", + 6: "Using RLS will result in fallback to DQ", } # Create a new column based on the mapping - df['Fallback Reason Detail'] = np.vectorize(value_mapping.get)(df['FallbackReasonID']) - - return df \ No newline at end of file + df["Fallback Reason Detail"] = np.vectorize(value_mapping.get)( + df["FallbackReasonID"] + ) + + return df diff --git a/sempy_labs/GetDirectLakeLakehouse.py b/src/sempy_labs/directlake/_get_directlake_lakehouse.py similarity index 51% rename from sempy_labs/GetDirectLakeLakehouse.py rename to src/sempy_labs/directlake/_get_directlake_lakehouse.py index 8ef8fb46..2ba51cb3 100644 --- a/sempy_labs/GetDirectLakeLakehouse.py +++ b/src/sempy_labs/directlake/_get_directlake_lakehouse.py @@ -1,11 +1,21 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_lakehouse_id, resolve_lakehouse_name, get_direct_lake_sql_endpoint -from typing import List, Optional, Union +from sempy_labs._helper_functions import ( + resolve_lakehouse_id, + resolve_lakehouse_name, + get_direct_lake_sql_endpoint, +) +from typing import Optional, Tuple from uuid import UUID +from sempy_labs._helper_functions import resolve_workspace_name_and_id -def get_direct_lake_lakehouse(dataset: str, workspace: Optional[str] = None, lakehouse: Optional[str] = None, lakehouse_workspace: Optional[str] = None): +def get_direct_lake_lakehouse( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +) -> Tuple[str, UUID]: """ Identifies the lakehouse used by a Direct Lake semantic model. @@ -27,38 +37,33 @@ def get_direct_lake_lakehouse(dataset: str, workspace: Optional[str] = None, lak Returns ------- - str, UUID + str, uuid.UUID The lakehouse name and lakehouse ID. - """ + """ - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if lakehouse_workspace is None: lakehouse_workspace = workspace - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: - print(f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode.") - else: - sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) + raise ValueError( + f"ERROR: The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode." + ) - dfI = fabric.list_items(workspace = lakehouse_workspace, type = 'SQLEndpoint') - dfI_filt = dfI[dfI['Id'] == sqlEndpointId] - lakehouseName = dfI_filt['Display Name'].iloc[0] + sqlEndpointId = get_direct_lake_sql_endpoint(dataset, workspace) - lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace) + dfI = fabric.list_items(workspace=lakehouse_workspace, type="SQLEndpoint") + dfI_filt = dfI[dfI["Id"] == sqlEndpointId] + lakehouseName = dfI_filt["Display Name"].iloc[0] - return lakehouseName, lakehouseId - - + lakehouseId = resolve_lakehouse_id(lakehouseName, lakehouse_workspace) + return lakehouseName, lakehouseId diff --git a/src/sempy_labs/directlake/_get_shared_expression.py b/src/sempy_labs/directlake/_get_shared_expression.py new file mode 100644 index 00000000..fd1119bf --- /dev/null +++ b/src/sempy_labs/directlake/_get_shared_expression.py @@ -0,0 +1,59 @@ +import sempy +import sempy.fabric as fabric +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) +from sempy_labs._list_functions import list_lakehouses +from typing import Optional + + +def get_shared_expression( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +): + """ + Dynamically generates the M expression used by a Direct Lake model for a given lakehouse. + + Parameters + ---------- + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str + Shows the expression which can be used to connect a Direct Lake semantic model to its SQL Endpoint. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id) + + dfL = list_lakehouses(workspace=workspace) + lakeDetail = dfL[dfL["Lakehouse Name"] == lakehouse] + + sqlEPCS = lakeDetail["SQL Endpoint Connection String"].iloc[0] + sqlepid = lakeDetail["SQL Endpoint ID"].iloc[0] + provStatus = lakeDetail["SQL Endpoint Provisioning Status"].iloc[0] + + if provStatus == "InProgress": + print( + f"The SQL Endpoint for the '{lakehouse}' lakehouse within the '{workspace}' workspace has not yet been provisioned. Please wait until it has been provisioned." + ) + return + + sh = ( + 'let\n\tdatabase = Sql.Database("' + + sqlEPCS + + '", "' + + sqlepid + + '")\nin\n\tdatabase' + ) + + return sh diff --git a/sempy_labs/Guardrails.py b/src/sempy_labs/directlake/_guardrails.py similarity index 71% rename from sempy_labs/Guardrails.py rename to src/sempy_labs/directlake/_guardrails.py index 3826cb45..1849289b 100644 --- a/sempy_labs/Guardrails.py +++ b/src/sempy_labs/directlake/_guardrails.py @@ -3,8 +3,8 @@ import pandas as pd from typing import List, Optional, Union -def get_direct_lake_guardrails(): +def get_direct_lake_guardrails(): """ Shows the guardrails for when Direct Lake semantic models will fallback to Direct Query based on Microsoft's online documentation. @@ -17,17 +17,17 @@ def get_direct_lake_guardrails(): A table showing the Direct Lake guardrails by SKU. """ - url = 'https://learn.microsoft.com/power-bi/enterprise/directlake-overview' + url = "https://learn.microsoft.com/power-bi/enterprise/directlake-overview" tables = pd.read_html(url) df = tables[0] - df['Fabric SKUs'] = df['Fabric SKUs'].str.split('/') - df = df.explode('Fabric SKUs', ignore_index=True) - + df["Fabric SKUs"] = df["Fabric SKUs"].str.split("/") + df = df.explode("Fabric SKUs", ignore_index=True) + return df -def get_sku_size(workspace: Optional[str] = None): +def get_sku_size(workspace: Optional[str] = None): """ Shows the SKU size for a workspace. @@ -49,15 +49,20 @@ def get_sku_size(workspace: Optional[str] = None): workspace = fabric.resolve_workspace_name(workspace_id) dfC = fabric.list_capacities() - dfW = fabric.list_workspaces().sort_values(by='Name', ascending=True) - dfC.rename(columns={'Id': 'Capacity Id'}, inplace=True) - dfCW = pd.merge(dfW, dfC[['Capacity Id', 'Sku', 'Region', 'State']], on='Capacity Id', how='inner') - sku_value = dfCW.loc[dfCW['Name'] == workspace, 'Sku'].iloc[0] - + dfW = fabric.list_workspaces().sort_values(by="Name", ascending=True) + dfC.rename(columns={"Id": "Capacity Id"}, inplace=True) + dfCW = pd.merge( + dfW, + dfC[["Capacity Id", "Sku", "Region", "State"]], + on="Capacity Id", + how="inner", + ) + sku_value = dfCW.loc[dfCW["Name"] == workspace, "Sku"].iloc[0] + return sku_value -def get_directlake_guardrails_for_sku(sku_size: str): +def get_directlake_guardrails_for_sku(sku_size: str): """ Shows the guardrails for Direct Lake based on the SKU used by your workspace's capacity. *Use the result of the 'get_sku_size' function as an input for this function's skuSize parameter.* @@ -65,7 +70,7 @@ def get_directlake_guardrails_for_sku(sku_size: str): Parameters ---------- sku_size : str - Sku size of a workspace/capacity + Sku size of a workspace/capacity Returns ------- @@ -74,6 +79,6 @@ def get_directlake_guardrails_for_sku(sku_size: str): """ df = get_direct_lake_guardrails() - filtered_df = df[df['Fabric SKUs'] == sku_size] - - return filtered_df \ No newline at end of file + filtered_df = df[df["Fabric SKUs"] == sku_size] + + return filtered_df diff --git a/sempy_labs/ListDirectLakeModelCalcTables.py b/src/sempy_labs/directlake/_list_directlake_model_calc_tables.py similarity index 68% rename from sempy_labs/ListDirectLakeModelCalcTables.py rename to src/sempy_labs/directlake/_list_directlake_model_calc_tables.py index 3eac1966..77a0463e 100644 --- a/sempy_labs/ListDirectLakeModelCalcTables.py +++ b/src/sempy_labs/directlake/_list_directlake_model_calc_tables.py @@ -1,13 +1,13 @@ import sempy import sempy.fabric as fabric import pandas as pd -from .ListFunctions import list_tables, list_annotations -from typing import List, Optional, Union +from sempy_labs._list_functions import list_tables, list_annotations +from typing import Optional from sempy._utils._log import log + @log def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] = None): - """ Shows the calculated tables and their respective DAX expression for a Direct Lake model (which has been migrated from import/DirectQuery. @@ -30,23 +30,25 @@ def list_direct_lake_model_calc_tables(dataset: str, workspace: Optional[str] = workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - df = pd.DataFrame(columns=['Table Name', 'Source Expression']) + df = pd.DataFrame(columns=["Table Name", "Source Expression"]) - dfP = fabric.list_partitions(dataset = dataset, workspace = workspace) - dfP_filt = dfP[dfP['Mode'] == 'DirectLake'] + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] if len(dfP_filt) == 0: print(f"The '{dataset}' semantic model is not in Direct Lake mode.") else: dfA = list_annotations(dataset, workspace) dfT = list_tables(dataset, workspace) - dfA_filt = dfA[(dfA['Object Type'] == 'Model') & (dfA['Annotation Name'].isin(dfT['Name']))] + dfA_filt = dfA[ + (dfA["Object Type"] == "Model") & (dfA["Annotation Name"].isin(dfT["Name"])) + ] - for i,r in dfA_filt.iterrows(): - tName = r['Annotation Name'] - se = r['Annotation Value'] + for i, r in dfA_filt.iterrows(): + tName = r["Annotation Name"] + se = r["Annotation Value"] - new_data = {'Table Name': tName, 'Source Expression': se} + new_data = {"Table Name": tName, "Source Expression": se} df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/src/sempy_labs/directlake/_show_unsupported_directlake_objects.py b/src/sempy_labs/directlake/_show_unsupported_directlake_objects.py new file mode 100644 index 00000000..5fc88b95 --- /dev/null +++ b/src/sempy_labs/directlake/_show_unsupported_directlake_objects.py @@ -0,0 +1,89 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._list_functions import list_tables +from sempy_labs._helper_functions import format_dax_object_name +from typing import Optional, Tuple + + +def show_unsupported_direct_lake_objects( + dataset: str, workspace: Optional[str] = None +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Returns a list of a semantic model's objects which are not supported by Direct Lake based on `official documentation `_. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame, pandas.DataFrame, pandas.DataFrame + 3 pandas dataframes showing objects in a semantic model which are not supported by Direct Lake. + """ + + pd.options.mode.chained_assignment = None + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + + # Calc tables + dfT_filt = dfT[dfT["Type"] == "Calculated Table"] + dfT_filt.rename(columns={"Name": "Table Name"}, inplace=True) + t = dfT_filt[["Table Name", "Type"]] + + # Calc columns + dfC_filt = dfC[(dfC["Type"] == "Calculated") | (dfC["Data Type"] == "Binary")] + c = dfC_filt[["Table Name", "Column Name", "Type", "Data Type", "Source"]] + + # Relationships + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfR["From Object"] = format_dax_object_name(dfR["From Table"], dfR["From Column"]) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + merged_from = pd.merge( + dfR, dfC, left_on="From Object", right_on="Column Object", how="left" + ) + merged_to = pd.merge( + dfR, dfC, left_on="To Object", right_on="Column Object", how="left" + ) + + dfR["From Column Data Type"] = merged_from["Data Type"] + dfR["To Column Data Type"] = merged_to["Data Type"] + + dfR_filt = dfR[ + ( + (dfR["From Column Data Type"] == "DateTime") + | (dfR["To Column Data Type"] == "DateTime") + ) + | (dfR["From Column Data Type"] != dfR["To Column Data Type"]) + ] + r = dfR_filt[ + [ + "From Table", + "From Column", + "To Table", + "To Column", + "From Column Data Type", + "To Column Data Type", + ] + ] + + # print('Calculated Tables are not supported...') + # display(t) + # print("Learn more about Direct Lake limitations here: https://learn.microsoft.com/power-bi/enterprise/directlake-overview#known-issues-and-limitations") + # print('Calculated columns are not supported. Columns of binary data type are not supported.') + # display(c) + # print('Columns used for relationship cannot be of data type datetime and they also must be of the same data type.') + # display(r) + + return t, c, r diff --git a/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py b/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py new file mode 100644 index 00000000..98391029 --- /dev/null +++ b/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py @@ -0,0 +1,81 @@ +import sempy +import sempy.fabric as fabric +from sempy_labs.directlake._get_shared_expression import get_shared_expression +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) +from sempy_labs._tom import connect_semantic_model +from typing import List, Optional, Union + + +def update_direct_lake_model_lakehouse_connection( + dataset: str, + workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name in which the semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if lakehouse_workspace == None: + lakehouse_workspace = workspace + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + # Check if lakehouse is valid + dfI = fabric.list_items(workspace=lakehouse_workspace, type="Lakehouse") + dfI_filt = dfI[(dfI["Display Name"] == lakehouse)] + + if len(dfI_filt) == 0: + print( + f"The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. Therefore it cannot be used to support the '{dataset}' semantic model within the '{workspace}' workspace." + ) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[dfP["Mode"] == "DirectLake"] + + if len(dfP_filt) == 0: + print( + f"The '{dataset}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models." + ) + else: + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: + + shEx = get_shared_expression(lakehouse, lakehouse_workspace) + try: + tom.model.Expressions["DatabaseQuery"].Expression = shEx + print( + f"The expression in the '{dataset}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace." + ) + except: + print( + f"ERROR: The expression in the '{dataset}' semantic model was not updated." + ) diff --git a/sempy_labs/UpdateDirectLakePartitionEntity.py b/src/sempy_labs/directlake/_update_directlake_partition_entity.py similarity index 52% rename from sempy_labs/UpdateDirectLakePartitionEntity.py rename to src/sempy_labs/directlake/_update_directlake_partition_entity.py index 35561abc..d710b146 100644 --- a/sempy_labs/UpdateDirectLakePartitionEntity.py +++ b/src/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -1,10 +1,14 @@ -import sempy import sempy.fabric as fabric -from .TOM import connect_semantic_model +from sempy_labs._tom import connect_semantic_model from typing import List, Optional, Union -def update_direct_lake_partition_entity(dataset: str, table_name: Union[str, List[str]], entity_name: Union[str, List[str]], workspace: Optional[str] = None): +def update_direct_lake_partition_entity( + dataset: str, + table_name: Union[str, List[str]], + entity_name: Union[str, List[str]], + workspace: Optional[str] = None, +): """ Remaps a table (or tables) in a Direct Lake semantic model to a table in a lakehouse. @@ -20,43 +24,41 @@ def update_direct_lake_partition_entity(dataset: str, table_name: Union[str, Lis The Fabric workspace name in which the semantic model exists. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + """ - Returns - ------- - - """ - - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) # Support both str & list types if isinstance(table_name, str): table_name = [table_name] if isinstance(entity_name, str): entity_name = [entity_name] - + if len(table_name) != len(entity_name): - print(f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length.") + print( + f"ERROR: The 'table_name' and 'entity_name' arrays must be of equal length." + ) return - - with connect_semantic_model(dataset=dataset, readonly=False, workspace=workspace) as tom: + + with connect_semantic_model( + dataset=dataset, readonly=False, workspace=workspace + ) as tom: if not tom.is_direct_lake(): - print(f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode.") + print( + f"The '{dataset}' semantic model within the '{workspace}' workspace is not in Direct Lake mode." + ) return for tName in table_name: i = table_name.index(tName) eName = entity_name[i] try: - tom.model.Tables[tName].Partitions[0].EntityName = eName - print(f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace.") + tom.model.Tables[tName].Partitions[0].EntityName = eName + print( + f"The '{tName}' table in the '{dataset}' semantic model has been updated to point to the '{eName}' table in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) except: - print(f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated.") - - - - + print( + f"ERROR: The '{tName}' table in the '{dataset}' semantic model has not been updated." + ) diff --git a/src/sempy_labs/directlake/_warm_cache.py b/src/sempy_labs/directlake/_warm_cache.py new file mode 100644 index 00000000..d995b437 --- /dev/null +++ b/src/sempy_labs/directlake/_warm_cache.py @@ -0,0 +1,210 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from tqdm.auto import tqdm +import numpy as np +import time +from sempy_labs._helper_functions import format_dax_object_name +from sempy_labs._refresh_semantic_model import refresh_semantic_model +from sempy_labs._model_dependencies import get_measure_dependencies +from typing import Optional +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def warm_direct_lake_cache_perspective( + dataset: str, + perspective: str, + add_dependencies: Optional[bool] = False, + workspace: Optional[str] = None, +): + """ + Warms the cache of a Direct Lake semantic model by running a simple DAX query against the columns in a perspective. + + Parameters + ---------- + dataset : str + Name of the semantic model. + perspective : str + Name of the perspective which contains objects to be used for warming the cache. + add_dependencies : bool, default=False + Includes object dependencies in the cache warming process. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + Returns a pandas dataframe showing the columns that have been put into memory. + """ + + workspace = fabric.resolve_workspace_name(workspace) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"{icons.red_dot} The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode." + ) + return + + dfPersp = fabric.list_perspectives(dataset=dataset, workspace=workspace) + dfPersp["DAX Object Name"] = format_dax_object_name( + dfPersp["Table Name"], dfPersp["Object Name"] + ) + dfPersp_filt = dfPersp[dfPersp["Perspective Name"] == perspective] + + if len(dfPersp_filt) == 0: + print( + f"{icons.red_dot} The '{perspective} perspective does not exist or contains no objects within the '{dataset}' semantic model in the '{workspace}' workspace." + ) + return + dfPersp_c = dfPersp_filt[dfPersp_filt["Object Type"] == "Column"] + + column_values = dfPersp_c["DAX Object Name"].tolist() + + if add_dependencies: + # Measure dependencies + md = get_measure_dependencies(dataset, workspace) + md["Referenced Full Object"] = format_dax_object_name( + md["Referenced Table"], md["Referenced Object"] + ) + dfPersp_m = dfPersp_filt[(dfPersp_filt["Object Type"] == "Measure")] + md_filt = md[ + (md["Object Name"].isin(dfPersp_m["Object Name"].values)) + & (md["Referenced Object Type"] == "Column") + ] + measureDep = md_filt["Referenced Full Object"].unique() + + # Hierarchy dependencies + dfPersp_h = dfPersp_filt[(dfPersp_filt["Object Type"] == "Hierarchy")] + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfH["Hierarchy Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Hierarchy Name"] + ) + dfH["Column Object"] = format_dax_object_name( + dfH["Table Name"], dfH["Column Name"] + ) + dfH_filt = dfH[ + dfH["Hierarchy Object"].isin(dfPersp_h["DAX Object Name"].values) + ] + hierarchyDep = dfH_filt["Column Object"].unique() + + # Relationship dependencies + unique_table_names = dfPersp_filt["Table Name"].unique() + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + dfR["From Object"] = format_dax_object_name( + dfR["From Table"], dfR["From Column"] + ) + dfR["To Object"] = format_dax_object_name(dfR["To Table"], dfR["To Column"]) + filtered_dfR = dfR[ + dfR["From Table"].isin(unique_table_names) + & dfR["To Table"].isin(unique_table_names) + ] + + fromObjects = filtered_dfR["From Object"].unique() + toObjects = filtered_dfR["To Object"].unique() + + merged_list = np.concatenate( + [column_values, measureDep, hierarchyDep, fromObjects, toObjects] + ) + merged_list_unique = list(set(merged_list)) + + else: + merged_list_unique = column_values + + df = pd.DataFrame(merged_list_unique, columns=["DAX Object Name"]) + df[["Table Name", "Column Name"]] = df["DAX Object Name"].str.split( + "[", expand=True + ) + df["Table Name"] = df["Table Name"].str[1:-1] + df["Column Name"] = df["Column Name"].str[0:-1] + + tbls = list(set(value.split("[")[0] for value in merged_list_unique)) + + for tableName in (bar := tqdm(tbls)): + filtered_list = [ + value for value in merged_list_unique if value.startswith(f"{tableName}[") + ] + bar.set_description(f"Warming the '{tableName}' table...") + css = ",".join(map(str, filtered_list)) + dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" "" + x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace) + + print(f"{icons.green_dot} The following columns have been put into memory:") + + new_column_order = ["Table Name", "Column Name", "DAX Object Name"] + df = df.reindex(columns=new_column_order) + df = df[["Table Name", "Column Name"]].sort_values( + by=["Table Name", "Column Name"], ascending=True + ) + + return df + + +@log +def warm_direct_lake_cache_isresident( + dataset: str, workspace: Optional[str] = None +) -> pd.DataFrame: + """ + Performs a refresh on the semantic model and puts the columns which were in memory prior to the refresh back into memory. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + Returns a pandas dataframe showing the columns that have been put into memory. + """ + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + if not any(r["Mode"] == "DirectLake" for i, r in dfP.iterrows()): + print( + f"The '{dataset}' semantic model in the '{workspace}' workspace is not in Direct Lake mode. This function is specifically for semantic models in Direct Lake mode." + ) + return + + # Identify columns which are currently in memory (Is Resident = True) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace, extended=True) + dfC["DAX Object Name"] = format_dax_object_name( + dfC["Table Name"], dfC["Column Name"] + ) + dfC_filtered = dfC[dfC["Is Resident"]] + + if len(dfC_filtered) == 0: + print( + f"{icons.yellow_dot} At present, no columns are in memory in the '{dataset}' semantic model in the '{workspace}' workspace." + ) + return + + # Refresh/frame dataset + refresh_semantic_model(dataset=dataset, refresh_type="full", workspace=workspace) + + time.sleep(2) + + tbls = dfC_filtered["Table Name"].unique() + column_values = dfC_filtered["DAX Object Name"].tolist() + + # Run basic query to get columns into memory; completed one table at a time (so as not to overload the capacity) + for tableName in (bar := tqdm(tbls)): + bar.set_description(f"Warming the '{tableName}' table...") + css = ",".join(map(str, column_values)) + dax = """EVALUATE TOPN(1,SUMMARIZECOLUMNS(""" + css + "))" "" + x = fabric.evaluate_dax(dataset=dataset, dax_string=dax, workspace=workspace) + + print( + f"{icons.green_dot} The following columns have been put into memory. Temperature indicates the column temperature prior to the semantic model refresh." + ) + + return dfC_filtered[ + ["Table Name", "Column Name", "Is Resident", "Temperature"] + ].sort_values(by=["Table Name", "Column Name"], ascending=True) diff --git a/src/sempy_labs/lakehouse/__init__.py b/src/sempy_labs/lakehouse/__init__.py new file mode 100644 index 00000000..9db8e01a --- /dev/null +++ b/src/sempy_labs/lakehouse/__init__.py @@ -0,0 +1,24 @@ +from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._lakehouse import ( + lakehouse_attached, + optimize_lakehouse_tables, +) + +from sempy_labs.lakehouse._shortcuts import ( + list_shortcuts, + # create_shortcut, + create_shortcut_onelake, + delete_shortcut, +) + +__all__ = [ + "get_lakehouse_columns", + "get_lakehouse_tables", + "lakehouse_attached", + "optimize_lakehouse_tables", + "list_shortcuts", + # create_shortcut, + "create_shortcut_onelake", + "delete_shortcut", +] diff --git a/sempy_labs/GetLakehouseColumns.py b/src/sempy_labs/lakehouse/_get_lakehouse_columns.py similarity index 53% rename from sempy_labs/GetLakehouseColumns.py rename to src/sempy_labs/lakehouse/_get_lakehouse_columns.py index 56807281..cfb3d387 100644 --- a/sempy_labs/GetLakehouseColumns.py +++ b/src/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -2,13 +2,17 @@ import sempy.fabric as fabric import pandas as pd from pyspark.sql import SparkSession -from delta import DeltaTable -from .HelperFunctions import resolve_lakehouse_name, format_dax_object_name, resolve_lakehouse_id -from .GetLakehouseTables import get_lakehouse_tables -from typing import List, Optional, Union +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + format_dax_object_name, + resolve_lakehouse_id, +) +from typing import Optional -def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def get_lakehouse_columns( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +): """ Shows the tables and columns of a lakehouse and their respective properties. @@ -27,14 +31,21 @@ def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[s pandas.DataFrame Shows the tables/columns within a lakehouse and their properties. """ + from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables + from delta import DeltaTable - df = pd.DataFrame(columns=['Workspace Name', 'Lakehouse Name', 'Table Name', 'Column Name', 'Full Column Name', 'Data Type']) + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Lakehouse Name", + "Table Name", + "Column Name", + "Full Column Name", + "Data Type", + ] + ) - if workspace == None: - workspace_id = fabric.get_workspace_id() - workspace = fabric.resolve_workspace_name(workspace_id) - else: - workspace_id = fabric.resolve_workspace_id(workspace) + workspace = fabric.resolve_workspace_name(workspace) if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() @@ -44,18 +55,27 @@ def get_lakehouse_columns(lakehouse: Optional[str] = None, workspace: Optional[s spark = SparkSession.builder.getOrCreate() - tables = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace, extended = False, count_rows = False) - tables_filt = tables[tables['Format'] == 'delta'] + tables = get_lakehouse_tables( + lakehouse=lakehouse, workspace=workspace, extended=False, count_rows=False + ) + tables_filt = tables[tables["Format"] == "delta"] for i, r in tables_filt.iterrows(): - tName = r['Table Name'] - tPath = r['Location'] + tName = r["Table Name"] + tPath = r["Location"] delta_table = DeltaTable.forPath(spark, tPath) sparkdf = delta_table.toDF() for cName, data_type in sparkdf.dtypes: tc = format_dax_object_name(tName, cName) - new_data = {'Workspace Name': workspace, 'Lakehouse Name': lakehouse, 'Table Name': tName, 'Column Name': cName, 'Full Column Name': tc, 'Data Type': data_type} + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Column Name": cName, + "Full Column Name": tc, + "Data Type": data_type, + } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - return df \ No newline at end of file + return df diff --git a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py new file mode 100644 index 00000000..f80b3e18 --- /dev/null +++ b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -0,0 +1,250 @@ +import sempy.fabric as fabric +import pandas as pd +from pyspark.sql import SparkSession +import pyarrow.parquet as pq +import datetime +from sempy_labs._helper_functions import ( + resolve_lakehouse_id, + resolve_lakehouse_name, + resolve_workspace_name_and_id, +) +from sempy_labs.directlake._guardrails import ( + get_sku_size, + get_directlake_guardrails_for_sku, +) +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from typing import Optional + + +def get_lakehouse_tables( + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, + extended: Optional[bool] = False, + count_rows: Optional[bool] = False, + export: Optional[bool] = False, +): + """ + Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. + + Parameters + ---------- + lakehouse : str, default=None + The Fabric lakehouse. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + extended : bool, default=False + Obtains additional columns relevant to the size of each table. + count_rows : bool, default=False + Obtains a row count for each lakehouse table. + export : bool, default=False + Exports the resulting dataframe to a delta table in the lakehouse. + + Returns + ------- + pandas.DataFrame + Shows the tables/columns within a lakehouse and their properties. + """ + + df = pd.DataFrame( + columns=[ + "Workspace Name", + "Lakehouse Name", + "Table Name", + "Format", + "Type", + "Location", + ] + ) + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + + if count_rows: # Setting countrows defaults to extended=True + extended = True + + client = fabric.FabricRestClient() + response = client.get( + f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables" + ) + + for i in response.json()["data"]: + tName = i["name"] + tType = i["type"] + tFormat = i["format"] + tLocation = i["location"] + if extended == False: + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + else: + sku_value = get_sku_size(workspace) + guardrail = get_directlake_guardrails_for_sku(sku_value) + + spark = SparkSession.builder.getOrCreate() + + intColumns = ["Files", "Row Groups", "Table Size"] + if tType == "Managed" and tFormat == "delta": + detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0] + num_files = detail_df.numFiles + size_in_bytes = detail_df.sizeInBytes + + delta_table_path = f"Tables/{tName}" + latest_files = ( + spark.read.format("delta").load(delta_table_path).inputFiles() + ) + file_paths = [f.split("/")[-1] for f in latest_files] + + # Handle FileNotFoundError + num_rowgroups = 0 + for filename in file_paths: + try: + num_rowgroups += pq.ParquetFile( + f"/lakehouse/default/{delta_table_path}/{filename}" + ).num_row_groups + except FileNotFoundError: + continue + + if count_rows: + num_rows = spark.table(tName).count() + intColumns.append("Row Count") + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + "Files": num_files, + "Row Groups": num_rowgroups, + "Row Count": num_rows, + "Table Size": size_in_bytes, + } + else: + new_data = { + "Workspace Name": workspace, + "Lakehouse Name": lakehouse, + "Table Name": tName, + "Format": tFormat, + "Type": tType, + "Location": tLocation, + "Files": num_files, + "Row Groups": num_rowgroups, + "Table Size": size_in_bytes, + } + + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + df[intColumns] = df[intColumns].astype(int) + + df["SKU"] = guardrail["Fabric SKUs"].iloc[0] + df["Parquet File Guardrail"] = guardrail["Parquet files per table"].iloc[0] + df["Row Group Guardrail"] = guardrail["Row groups per table"].iloc[0] + df["Row Count Guardrail"] = ( + guardrail["Rows per table (millions)"].iloc[0] * 1000000 + ) + + df["Parquet File Guardrail Hit"] = ( + df["Files"] > df["Parquet File Guardrail"] + ) + df["Row Group Guardrail Hit"] = df["Row Groups"] > df["Row Group Guardrail"] + + if count_rows: + df["Row Count Guardrail Hit"] = ( + df["Row Count"] > df["Row Count Guardrail"] + ) + + if export: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + spark = SparkSession.builder.getOrCreate() + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name( + lakehouse_id=lakehouse_id, workspace=workspace + ) + lakeTName = "lakehouse_table_details" + lakeT_filt = df[df["Table Name"] == lakeTName] + + query = f"SELECT MAX(RunId) FROM {lakehouse}.{lakeTName}" + + if len(lakeT_filt) == 0: + runId = 1 + else: + dfSpark = spark.sql(query) + maxRunId = dfSpark.collect()[0][0] + runId = maxRunId + 1 + + export_df = df.copy() + + cols = [ + "Files", + "Row Groups", + "Row Count", + "Table Size", + "SKU", + "Parquet File Guardrail", + "Row Group Guardrail", + "Row Count Guardrail", + "Parquet File Guardrail Hit", + "Row Group Guardrail Hit", + "Row Count Guardrail Hit", + ] + + for c in cols: + if c not in export_df: + if c in [ + "Files", + "Row Groups", + "Row Count", + "Table Size", + "Parquet File Guardrail", + "Row Group Guardrail", + "Row Count Guardrail", + ]: + export_df[c] = 0 + export_df[c] = export_df[c].astype(int) + elif c in ["SKU"]: + export_df[c] = None + export_df[c] = export_df[c].astype(str) + elif c in [ + "Parquet File Guardrail Hit", + "Row Group Guardrail Hit", + "Row Count Guardrail Hit", + ]: + export_df[c] = False + export_df[c] = export_df[c].astype(bool) + + print( + f"Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n" + ) + now = datetime.datetime.now() + export_df["Timestamp"] = now + export_df["RunId"] = runId + + export_df.columns = export_df.columns.str.replace(" ", "_") + spark_df = spark.createDataFrame(export_df) + spark_df.write.mode("append").format("delta").saveAsTable(lakeTName) + print( + f"\u2022 Lakehouse table properties have been saved to the '{lakeTName}' delta table." + ) + + return df diff --git a/sempy_labs/Lakehouse.py b/src/sempy_labs/lakehouse/_lakehouse.py similarity index 63% rename from sempy_labs/Lakehouse.py rename to src/sempy_labs/lakehouse/_lakehouse.py index eb65e010..50e55c40 100644 --- a/sempy_labs/Lakehouse.py +++ b/src/sempy_labs/lakehouse/_lakehouse.py @@ -2,36 +2,36 @@ import sempy.fabric as fabric from tqdm.auto import tqdm from pyspark.sql import SparkSession -from delta import DeltaTable -from .HelperFunctions import resolve_lakehouse_name +from sempy_labs._helper_functions import resolve_lakehouse_name from typing import List, Optional, Union -def lakehouse_attached() -> bool: +def lakehouse_attached() -> bool: """ Identifies if a lakehouse is attached to the notebook. - Parameters - ---------- - Returns ------- bool Returns True if a lakehouse is attached to the notebook. - """ + """ spark = SparkSession.builder.getOrCreate() - lakeId = spark.conf.get('trident.lakehouse.id') - + lakeId = spark.conf.get("trident.lakehouse.id") + if len(lakeId) > 0: return True else: return False -def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, lakehouse: Optional[str] = None, workspace: Optional[str] = None): +def optimize_lakehouse_tables( + tables: Optional[Union[str, List[str]]] = None, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): """ - Runs the [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) function over the specified lakehouse tables. + Runs the `OPTIMIZE `_ function over the specified lakehouse tables. Parameters ---------- @@ -44,30 +44,27 @@ def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, la The Fabric workspace used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - """ - from .GetLakehouseTables import get_lakehouse_tables + from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables + from delta import DeltaTable if workspace == None: workspace_id = fabric.get_workspace_id() workspace = fabric.resolve_workspace_name(workspace_id) - + if lakehouse == None: lakehouse_id = fabric.get_lakehouse_id() lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) - lakeTables = get_lakehouse_tables(lakehouse = lakehouse, workspace = workspace) - lakeTablesDelta = lakeTables[lakeTables['Format'] == 'delta'] + lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"] if isinstance(tables, str): tables = [tables] if tables is not None: - tables_filt = lakeTablesDelta[lakeTablesDelta['Table Name'].isin(tables)] + tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)] else: tables_filt = lakeTablesDelta.copy() @@ -75,12 +72,14 @@ def optimize_lakehouse_tables(tables: Optional[Union[str, List[str]]] = None, la spark = SparkSession.builder.getOrCreate() - i=1 - for index, r in (bar := tqdm(tables_filt.iterrows())): - tableName = r['Table Name'] - tablePath = r['Location'] + i = 1 + for _, r in (bar := tqdm(tables_filt.iterrows())): + tableName = r["Table Name"] + tablePath = r["Location"] bar.set_description(f"Optimizing the '{tableName}' table...") deltaTable = DeltaTable.forPath(spark, tablePath) deltaTable.optimize().executeCompaction() - print(f"The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})") - i+=1 + print( + f"The '{tableName}' table has been optimized. ({str(i)}/{str(tableCount)})" + ) + i += 1 diff --git a/src/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py new file mode 100644 index 00000000..a02073ed --- /dev/null +++ b/src/sempy_labs/lakehouse/_shortcuts.py @@ -0,0 +1,296 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_lakehouse_id, + resolve_workspace_name_and_id, +) +from typing import List, Optional, Union +import sempy_labs._icons as icons + + +def create_shortcut_onelake( + table_name: str, + source_lakehouse: str, + source_workspace: str, + destination_lakehouse: str, + destination_workspace: Optional[str] = None, + shortcut_name: Optional[str] = None, +): + """ + Creates a `shortcut `_ to a delta table in OneLake. + + Parameters + ---------- + table_name : str + The table name for which a shortcut will be created. + source_lakehouse : str + The Fabric lakehouse in which the table resides. + source_workspace : str + The name of the Fabric workspace in which the source lakehouse exists. + destination_lakehouse : str + The Fabric lakehouse in which the shortcut will be created. + destination_workspace : str, default=None + The name of the Fabric workspace in which the shortcut will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + shortcut_name : str, default=None + The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. + """ + + sourceWorkspaceId = fabric.resolve_workspace_id(source_workspace) + sourceLakehouseId = resolve_lakehouse_id(source_lakehouse, source_workspace) + + if destination_workspace == None: + destination_workspace = source_workspace + + destinationWorkspaceId = fabric.resolve_workspace_id(destination_workspace) + destinationLakehouseId = resolve_lakehouse_id( + destination_lakehouse, destination_workspace + ) + + if shortcut_name == None: + shortcut_name = table_name + + client = fabric.FabricRestClient() + tablePath = "Tables/" + table_name + + request_body = { + "path": "Tables", + "name": shortcut_name.replace(" ", ""), + "target": { + "oneLake": { + "workspaceId": sourceWorkspaceId, + "itemId": sourceLakehouseId, + "path": tablePath, + } + }, + } + + try: + response = client.post( + f"/v1/workspaces/{destinationWorkspaceId}/items/{destinationLakehouseId}/shortcuts", + json=request_body, + ) + if response.status_code == 201: + print( + f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse}' lakehouse within the '{destination_workspace} workspace. It is based on the '{table_name}' table in the '{source_lakehouse}' lakehouse within the '{source_workspace}' workspace." + ) + else: + print(response.status_code) + except Exception as e: + print( + f"{icons.red_dot} Failed to create a shortcut for the '{table_name}' table: {e}" + ) + + +def create_shortcut( + shortcut_name: str, + location: str, + subpath: str, + source: str, + connection_id: str, + lakehouse: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Creates a `shortcut `_ to an ADLS Gen2 or Amazon S3 source. + + Parameters + ---------- + shortcut_name : str + location : str + subpath : str + source : str + connection_id: str + lakehouse : str + The Fabric lakehouse in which the shortcut will be created. + workspace : str, default=None + The name of the Fabric workspace in which the shortcut will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + source_titles = {"adlsGen2": "ADLS Gen2", "amazonS3": "Amazon S3"} + + sourceValues = list(source_titles.keys()) + + if source not in sourceValues: + print( + f"{icons.red_dot} The 'source' parameter must be one of these values: {sourceValues}." + ) + return + + sourceTitle = source_titles[source] + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + + client = fabric.FabricRestClient() + shortcutActualName = shortcut_name.replace(" ", "") + + request_body = { + "path": "Tables", + "name": shortcutActualName, + "target": { + source: { + "location": location, + "subpath": subpath, + "connectionId": connection_id, + } + }, + } + + try: + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts", + json=request_body, + ) + if response.status_code == 201: + print( + f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'." + ) + else: + print(response.status_code) + except: + print( + f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table." + ) + + +def list_shortcuts( + lakehouse: Optional[str] = None, workspace: Optional[str] = None +) -> pd.DataFrame: + """ + Shows all shortcuts which exist in a Fabric lakehouse. + + Parameters + ---------- + lakehouse : str, default=None + The Fabric lakehouse name. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str, default=None + The name of the Fabric workspace in which lakehouse resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing all the shortcuts which exist in the specified lakehouse. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + + df = pd.DataFrame( + columns=[ + "Shortcut Name", + "Shortcut Path", + "Source", + "Source Lakehouse Name", + "Source Workspace Name", + "Source Path", + "Source Connection ID", + "Source Location", + "Source SubPath", + ] + ) + + client = fabric.FabricRestClient() + response = client.get( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts" + ) + if response.status_code == 200: + for s in response.json()["value"]: + shortcutName = s["name"] + shortcutPath = s["path"] + source = list(s["target"].keys())[0] + ( + sourceLakehouseName, + sourceWorkspaceName, + sourcePath, + connectionId, + location, + subpath, + ) = (None, None, None, None, None, None) + if source == "oneLake": + sourceLakehouseId = s["target"][source]["itemId"] + sourcePath = s["target"][source]["path"] + sourceWorkspaceId = s["target"][source]["workspaceId"] + sourceWorkspaceName = fabric.resolve_workspace_name(sourceWorkspaceId) + sourceLakehouseName = resolve_lakehouse_name( + sourceLakehouseId, sourceWorkspaceName + ) + else: + connectionId = s["target"][source]["connectionId"] + location = s["target"][source]["location"] + subpath = s["target"][source]["subpath"] + + new_data = { + "Shortcut Name": shortcutName, + "Shortcut Path": shortcutPath, + "Source": source, + "Source Lakehouse Name": sourceLakehouseName, + "Source Workspace Name": sourceWorkspaceName, + "Source Path": sourcePath, + "Source Connection ID": connectionId, + "Source Location": location, + "Source SubPath": subpath, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + print( + f"This function relies on an API which is not yet official as of May 21, 2024. Once the API becomes official this function will work as expected." + ) + return df + + +def delete_shortcut( + shortcut_name: str, lakehouse: Optional[str] = None, workspace: Optional[str] = None +): + """ + Deletes a shortcut. + + Parameters + ---------- + shortcut_name : str + The name of the shortcut. + lakehouse : str, default=None + The Fabric lakehouse name in which the shortcut resides. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str, default=None + The name of the Fabric workspace in which lakehouse resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + + client = fabric.FabricRestClient() + response = client.delete( + f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/Tables/{shortcut_name}" + ) + + if response.status_code == 200: + print( + f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace}' workspace has been deleted." + ) + else: + print(f"{icons.red_dot} The '{shortcut_name}' has not been deleted.") diff --git a/src/sempy_labs/migration/__init__.py b/src/sempy_labs/migration/__init__.py new file mode 100644 index 00000000..60f78892 --- /dev/null +++ b/src/sempy_labs/migration/__init__.py @@ -0,0 +1,29 @@ +from sempy_labs.migration._create_pqt_file import create_pqt_file +from sempy_labs.migration._migrate_calctables_to_lakehouse import ( + migrate_calc_tables_to_lakehouse, + migrate_field_parameters, +) +from sempy_labs.migration._migrate_calctables_to_semantic_model import ( + migrate_calc_tables_to_semantic_model, +) +from sempy_labs.migration._migrate_model_objects_to_semantic_model import ( + migrate_model_objects_to_semantic_model, +) +from sempy_labs.migration._migrate_tables_columns_to_semantic_model import ( + migrate_tables_columns_to_semantic_model, +) +from sempy_labs.migration._migration_validation import ( + migration_validation, + # list_semantic_model_objects +) + +__all__ = [ + "create_pqt_file", + "migrate_calc_tables_to_lakehouse", + "migrate_field_parameters", + "migrate_calc_tables_to_semantic_model", + "migrate_model_objects_to_semantic_model", + "migrate_tables_columns_to_semantic_model", + "migration_validation", + # list_semantic_model_objects +] diff --git a/src/sempy_labs/migration/_create_pqt_file.py b/src/sempy_labs/migration/_create_pqt_file.py new file mode 100644 index 00000000..fee5ec97 --- /dev/null +++ b/src/sempy_labs/migration/_create_pqt_file.py @@ -0,0 +1,239 @@ +import sempy +import sempy.fabric as fabric +import json, os, shutil +import xml.etree.ElementTree as ET +from sempy_labs._list_functions import list_tables +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy._utils._log import log +from typing import Optional +import sempy_labs._icons as icons + + +@log +def create_pqt_file( + dataset: str, workspace: Optional[str] = None, file_name: Optional[str] = None +): + """ + Dynamically generates a `Power Query Template `_ file based on the semantic model. The .pqt file is saved within the Files section of your lakehouse. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + file_name : str, default=None + The name of the Power Query Template file to be generated. + Defaults to None which resolves to 'PowerQueryTemplate'. + """ + + if file_name is None: + file_name = "PowerQueryTemplate" + + lakeAttach = lakehouse_attached() + + if lakeAttach == False: + print( + f"{icons.red_dot} In order to run the 'create_pqt_file' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + folderPath = "/lakehouse/default/Files" + subFolderPath = os.path.join(folderPath, "pqtnewfolder") + os.makedirs(subFolderPath, exist_ok=True) + + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfT = list_tables(dataset, workspace) + dfE = fabric.list_expressions(dataset=dataset, workspace=workspace) + + # Check if M-partitions are used + if any(dfP["Source Type"] == "M"): + + class QueryMetadata: + def __init__( + self, + QueryName, + QueryGroupId=None, + LastKnownIsParameter=None, + LastKnownResultTypeName=None, + LoadEnabled=True, + IsHidden=False, + ): + self.QueryName = QueryName + self.QueryGroupId = QueryGroupId + self.LastKnownIsParameter = LastKnownIsParameter + self.LastKnownResultTypeName = LastKnownResultTypeName + self.LoadEnabled = LoadEnabled + self.IsHidden = IsHidden + + class RootObject: + def __init__( + self, DocumentLocale, EngineVersion, QueriesMetadata, QueryGroups=None + ): + if QueryGroups is None: + QueryGroups = [] + self.DocumentLocale = DocumentLocale + self.EngineVersion = EngineVersion + self.QueriesMetadata = QueriesMetadata + self.QueryGroups = QueryGroups + + # STEP 1: Create MashupDocument.pq + mdfileName = "MashupDocument.pq" + mdFilePath = os.path.join(subFolderPath, mdfileName) + sb = "section Section1;" + for table_name in dfP["Table Name"].unique(): + tName = '#"' + table_name + '"' + sourceExpression = dfT.loc[ + (dfT["Name"] == table_name), "Source Expression" + ].iloc[0] + refreshPolicy = dfT.loc[(dfT["Name"] == table_name), "Refresh Policy"].iloc[ + 0 + ] + sourceType = dfP.loc[(dfP["Table Name"] == table_name), "Source Type"].iloc[ + 0 + ] + + if sourceType == "M" or refreshPolicy: + sb = sb + "\n" + "shared " + tName + " = " + + partitions_in_table = dfP.loc[ + dfP["Table Name"] == table_name, "Partition Name" + ].unique() + + i = 1 + for partition_name in partitions_in_table: + pSourceType = dfP.loc[ + (dfP["Table Name"] == table_name) + & (dfP["Partition Name"] == partition_name), + "Source Type", + ].iloc[0] + pQuery = dfP.loc[ + (dfP["Table Name"] == table_name) + & (dfP["Partition Name"] == partition_name), + "Query", + ].iloc[0] + + if pQuery is not None: + pQueryNoSpaces = ( + pQuery.replace(" ", "") + .replace("\n", "") + .replace("\t", "") + .replace("\r", "") + ) + if pQueryNoSpaces.startswith('letSource=""'): + pQuery = 'let\n\tSource = ""\nin\n\tSource' + + if pSourceType == "M" and i == 1: + sb = sb + pQuery + ";" + elif refreshPolicy and i == 1: + sb = sb + sourceExpression + ";" + i += 1 + + for index, row in dfE.iterrows(): + expr = row["Expression"] + eName = row["Name"] + eName = '#"' + eName + '"' + sb = sb + "\n" + "shared " + eName + " = " + expr + ";" + + with open(mdFilePath, "w") as file: + file.write(sb) + + # STEP 2: Create the MashupMetadata.json file + mmfileName = "MashupMetadata.json" + mmFilePath = os.path.join(subFolderPath, mmfileName) + queryMetadata = [] + + for tName in dfP["Table Name"].unique(): + sourceType = dfP.loc[(dfP["Table Name"] == tName), "Source Type"].iloc[0] + refreshPolicy = dfT.loc[(dfT["Name"] == tName), "Refresh Policy"].iloc[0] + if sourceType == "M" or refreshPolicy: + queryMetadata.append( + QueryMetadata(tName, None, None, None, True, False) + ) + + for i, r in dfE.iterrows(): + eName = r["Name"] + eKind = r["Kind"] + if eKind == "M": + queryMetadata.append( + QueryMetadata(eName, None, None, None, True, False) + ) + else: + queryMetadata.append( + QueryMetadata(eName, None, None, None, False, False) + ) + + rootObject = RootObject("en-US", "2.126.453.0", queryMetadata) + + def obj_to_dict(obj): + if isinstance(obj, list): + return [obj_to_dict(e) for e in obj] + elif hasattr(obj, "__dict__"): + return {k: obj_to_dict(v) for k, v in obj.__dict__.items()} + else: + return obj + + jsonContent = json.dumps(obj_to_dict(rootObject), indent=4) + + with open(mmFilePath, "w") as json_file: + json_file.write(jsonContent) + + # STEP 3: Create Metadata.json file + mFileName = "Metadata.json" + mFilePath = os.path.join(subFolderPath, mFileName) + metaData = {"Name": "fileName", "Description": "", "Version": "1.0.0.0"} + jsonContent = json.dumps(metaData, indent=4) + + with open(mFilePath, "w") as json_file: + json_file.write(jsonContent) + + # STEP 4: Create [Content_Types].xml file: + ns = "http://schemas.openxmlformats.org/package/2006/content-types" + ET.register_namespace("", ns) + types = ET.Element("{%s}Types" % ns) + default1 = ET.SubElement( + types, + "{%s}Default" % ns, + {"Extension": "json", "ContentType": "application/json"}, + ) + default2 = ET.SubElement( + types, + "{%s}Default" % ns, + {"Extension": "pq", "ContentType": "application/x-ms-m"}, + ) + xmlDocument = ET.ElementTree(types) + xmlFileName = "[Content_Types].xml" + xmlFilePath = os.path.join(subFolderPath, xmlFileName) + xmlDocument.write( + xmlFilePath, xml_declaration=True, encoding="utf-8", method="xml" + ) + + # STEP 5: Zip up the 4 files + zipFileName = file_name + ".zip" + zipFilePath = os.path.join(folderPath, zipFileName) + shutil.make_archive(zipFilePath[:-4], "zip", subFolderPath) + + # STEP 6: Convert the zip file back into a .pqt file + newExt = ".pqt" + directory = os.path.dirname(zipFilePath) + fileNameWithoutExtension = os.path.splitext(os.path.basename(zipFilePath))[0] + newFilePath = os.path.join(directory, fileNameWithoutExtension + newExt) + shutil.move(zipFilePath, newFilePath) + + # STEP 7: Delete subFolder directory which is no longer needed + shutil.rmtree(subFolderPath, ignore_errors=True) + + print( + f"{icons.green_dot} '{file_name}.pqt' has been created based on the '{dataset}' semantic model in the '{workspace}' workspace within the Files section of your lakehouse." + ) + + else: + print( + f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace does not use Power Query so a Power Query Template file cannot be generated." + ) diff --git a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py new file mode 100644 index 00000000..854b42a2 --- /dev/null +++ b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -0,0 +1,429 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import re, datetime, time +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs._helper_functions import ( + resolve_lakehouse_name, + resolve_lakehouse_id, + create_abfss_path, +) +from sempy_labs._tom import connect_semantic_model +from pyspark.sql import SparkSession +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_calc_tables_to_lakehouse( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Creates delta tables in your lakehouse based on the DAX expression of a calculated table in an import/DirectQuery semantic model. The DAX expression encapsulating the calculated table logic is stored in the new Direct Lake semantic model as model annotations. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + workspace = fabric.resolve_workspace_name(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) + else: + lakehouse_workspace_id = fabric.resolve_workspace_id(lakehouse_workspace) + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + else: + lakehouse_id = resolve_lakehouse_id(lakehouse, lakehouse_workspace) + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + # dfC['Column Object'] = "'" + dfC['Table Name'] + "'[" + dfC['Column Name'] + "]" + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[ + ~dfP_filt["Query"].str.contains("NAMEOF") + ] # Remove field parameters + # dfC_CalcColumn = dfC[dfC['Type'] == 'Calculated'] + lakeTables = get_lakehouse_tables(lakehouse, lakehouse_workspace) + + # Do not execute the function if lakehouse tables already exist with the same name + killFunction = False + for i, r in dfP_filt.iterrows(): + tName = r["Table Name"] + dtName = tName.replace(" ", "_") + + if dtName in lakeTables["Table Name"].values: + print( + f"{icons.red_dot} The '{tName}' table already exists as '{dtName}' in the '{lakehouse}' lakehouse in the '{workspace}' workspace." + ) + killFunction = True + + if killFunction: + return + + spark = SparkSession.builder.getOrCreate() + + if len(dfP_filt) == 0: + print( + f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + success = True + for t in tom.model.Tables: + if tom.is_auto_date_table(table_name=t.Name): + print( + f"{icons.yellow_dot} The '{t.Name}' table is an auto-datetime table and is not supported in the Direct Lake migration process. Please create a proper Date/Calendar table in your lakehoues and use it in your Direct Lake model." + ) + else: + for p in t.Partitions: + if str(p.SourceType) == "Calculated": + query = p.Source.Expression + if "NAMEOF" not in query: # exclude field parameters + daxQuery = "" + if query.lower().startswith("calendar") and any( + str(c.Type) == "Calculated" for c in t.Columns + ): + daxQuery = f"ADDCOLUMNS(\n{query}," + for c in t.Columns: + if str(c.Type) == "Calculated": + expr = c.Expression + expr = expr.replace( + f"'{t.Name}'", "" + ).replace(f"{t.Name}[Date]", "[Date]") + expr = expr.replace( + "[MonthNo]", "MONTH([Date])" + ).replace( + "[QuarterNo]", + "INT((MONTH([Date]) + 2) / 3)", + ) + daxQuery = ( + f'{daxQuery}\n"{c.Name}",{expr},' + ) + daxQuery = ( + "EVALUATE\n" + daxQuery.rstrip(",") + "\n)" + ) + else: + daxQuery = f"EVALUATE\n{query}" + daxQueryTopN = ( + daxQuery.replace( + "EVALUATE\n", "EVALUATE\nTOPN(1," + ) + + ")" + ) + + try: + df = fabric.evaluate_dax( + dataset=dataset, + dax_string=daxQueryTopN, + workspace=workspace, + ) + + for col in df.columns: + pattern = r"\[([^\]]+)\]" + + matches = re.findall(pattern, col) + new_column_name = matches[0].replace( + " ", "" + ) + + df.rename( + columns={col: new_column_name}, + inplace=True, + ) + + try: + dataType = next( + str(c.DataType) + for c in tom.model.Tables[ + t.Name + ].Columns + if str(c.Type) + == "CalculatedTableColumn" + and c.SourceColumn == col + ) + except: + dataType = next( + str(c.DataType) + for c in tom.model.Tables[ + t.Name + ].Columns + if str(c.Type) == "Calculated" + and c.Name == new_column_name + ) + + if dataType == "Int64": + df[new_column_name] = df[ + new_column_name + ].astype(int) + elif dataType in ["Decimal", "Double"]: + df[new_column_name] = df[ + new_column_name + ].astype(float) + elif dataType == "Boolean": + df[new_column_name] = df[ + new_column_name + ].astype(bool) + elif dataType == "DateTime": + df[new_column_name] = pd.to_datetime( + df[new_column_name] + ) + + delta_table_name = t.Name.replace( + " ", "_" + ).lower() + + spark_df = spark.createDataFrame(df) + filePath = create_abfss_path( + lakehouse_id=lakehouse_id, + lakehouse_workspace_id=lakehouse_workspace_id, + delta_table_name=delta_table_name, + ) + spark_df.write.mode("overwrite").format( + "delta" + ).save(filePath) + + start_time2 = datetime.datetime.now() + timeout2 = datetime.timedelta(minutes=1) + success2 = False + + while not success2: + try: + with connect_semantic_model( + dataset=new_dataset, + readonly=False, + workspace=new_dataset_workspace, + ) as tom2: + success2 = True + tom2.set_annotation( + object=tom2.model, + name=t.Name, + value=daxQuery, + ) + except Exception as e: + if ( + datetime.datetime.now() + - start_time2 + > timeout2 + ): + break + time.sleep(1) + + print( + f"{icons.green_dot} Calculated table '{t.Name}' has been created as delta table '{delta_table_name.lower()}' in the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace." + ) + except: + print( + f"{icons.red_dot} Failed to create calculated table '{t.Name}' as a delta table in the lakehouse." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) + + +@log +def migrate_field_parameters( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Migrates field parameters from one semantic model to another. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + from .HelperFunctions import format_dax_object_name + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfC["Column Object"] = format_dax_object_name(dfC["Table Name"], dfC["Column Name"]) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[ + dfP_filt["Query"].str.contains("NAMEOF") + ] # Only field parameters + dfC_CalcColumn = dfC[dfC["Type"] == "Calculated"] + + if len(dfP_filt) == 0: + print( + f"{icons.green_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no field parameters." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, workspace=new_dataset_workspace, readonly=False + ) as tom: + success = True + + for i, r in dfP_filt.iterrows(): + tName = r["Table Name"] + query = r["Query"] + + # For field parameters, remove calc columns from the query + rows = query.strip().split("\n") + filtered_rows = [ + row + for row in rows + if not any( + value in row + for value in dfC_CalcColumn["Column Object"].values + ) + ] + updated_query_string = "\n".join(filtered_rows) + + # Remove extra comma + lines = updated_query_string.strip().split("\n") + lines[-2] = lines[-2].rstrip(",") + expr = "\n".join(lines) + + try: + par = TOM.Partition() + par.Name = tName + + parSource = TOM.CalculatedPartitionSource() + par.Source = parSource + parSource.Expression = expr + + tbl = TOM.Table() + tbl.Name = tName + tbl.Partitions.Add(par) + + columns = ["Value1", "Value2", "Value3"] + + for colName in columns: + col = TOM.CalculatedTableColumn() + col.Name = colName + col.SourceColumn = "[" + colName + "]" + col.DataType = TOM.DataType.String + + tbl.Columns.Add(col) + + tom.model.Tables.Add(tbl) + + ep = TOM.JsonExtendedProperty() + ep.Name = "ParameterMetadata" + ep.Value = '{"version":3,"kind":2}' + + rcd = TOM.RelatedColumnDetails() + gpc = TOM.GroupByColumn() + gpc.GroupingColumn = tom.model.Tables[tName].Columns["Value2"] + rcd.GroupByColumns.Add(gpc) + + # Update column properties + tom.model.Tables[tName].Columns["Value2"].IsHidden = True + tom.model.Tables[tName].Columns["Value3"].IsHidden = True + tom.model.Tables[tName].Columns[ + "Value3" + ].DataType = TOM.DataType.Int64 + tom.model.Tables[tName].Columns["Value1"].SortByColumn = ( + tom.model.Tables[tName].Columns["Value3"] + ) + tom.model.Tables[tName].Columns["Value2"].SortByColumn = ( + tom.model.Tables[tName].Columns["Value3"] + ) + tom.model.Tables[tName].Columns[ + "Value2" + ].ExtendedProperties.Add(ep) + tom.model.Tables[tName].Columns[ + "Value1" + ].RelatedColumnDetails = rcd + + dfC_filt1 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value1]") + ] + col1 = dfC_filt1["Column Name"].iloc[0] + dfC_filt2 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value2]") + ] + col2 = dfC_filt2["Column Name"].iloc[0] + dfC_filt3 = dfC[ + (dfC["Table Name"] == tName) & (dfC["Source"] == "[Value3]") + ] + col3 = dfC_filt3["Column Name"].iloc[0] + + tom.model.Tables[tName].Columns["Value1"].Name = col1 + tom.model.Tables[tName].Columns["Value2"].Name = col2 + tom.model.Tables[tName].Columns["Value3"].Name = col3 + + print( + f"{icons.green_dot} The '{tName}' table has been added as a field parameter to the '{new_dataset}' semantic model in the '{new_dataset_workspace}' workspace." + ) + except: + print( + f"{icons.red_dot} The '{tName}' table has not been added as a field parameter." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py new file mode 100644 index 00000000..1eb4cc68 --- /dev/null +++ b/src/sempy_labs/migration/_migrate_calctables_to_semantic_model.py @@ -0,0 +1,150 @@ +import sempy +import sempy.fabric as fabric +import re, datetime, time +from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs._tom import connect_semantic_model +from typing import Optional +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_calc_tables_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Creates new tables in the Direct Lake semantic model based on the lakehouse tables created using the 'migrate_calc_tables_to_lakehouse' function. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + workspace = fabric.resolve_workspace_name(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + # Get calc tables but not field parameters + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dfP_filt = dfP[(dfP["Source Type"] == "Calculated")] + dfP_filt = dfP_filt[~dfP_filt["Query"].str.contains("NAMEOF")] + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + lc = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) + # Get all calc table columns of calc tables not including field parameters + dfC_filt = dfC[ + (dfC["Table Name"].isin(dfP_filt["Table Name"])) + ] # & (dfC['Type'] == 'CalculatedTableColumn')] + # dfA = list_annotations(new_dataset, new_dataset_workspace) + # dfA_filt = dfA[(dfA['Object Type'] == 'Model') & ~ (dfA['Annotation Value'].str.contains('NAMEOF'))] + + if len(dfP_filt) == 0: + print( + f"{icons.green_dot} The '{dataset}' semantic model has no calculated tables." + ) + return + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + for tName in dfC_filt["Table Name"].unique(): + if tName.lower() in lc["Table Name"].values: + + try: + tom.model.Tables[tName] + except: + tom.add_table(name=tName) + tom.add_entity_partition( + table_name=tName, + entity_name=tName.replace(" ", "_").lower(), + ) + + columns_in_table = dfC_filt.loc[ + dfC_filt["Table Name"] == tName, "Column Name" + ].unique() + + for cName in columns_in_table: + scName = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Source", + ].iloc[0] + cDataType = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Data Type", + ].iloc[0] + cType = dfC.loc[ + (dfC["Table Name"] == tName) + & (dfC["Column Name"] == cName), + "Type", + ].iloc[0] + + # av = tom.get_annotation_value(object = tom.model, name = tName) + + # if cType == 'CalculatedTableColumn': + # lakeColumn = scName.replace(' ','_') + # elif cType == 'Calculated': + pattern = r"\[([^]]+)\]" + + matches = re.findall(pattern, scName) + lakeColumn = matches[0].replace(" ", "") + try: + tom.model.Tables[tName].Columns[cName] + except: + tom.add_data_column( + table_name=tName, + column_name=cName, + source_column=lakeColumn, + data_type=cDataType, + ) + print( + f"{icons.green_dot} The '{tName}'[{cName}] column has been added." + ) + + print( + f"\n{icons.green_dot} All viable calculated tables have been added to the model." + ) + + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py new file mode 100644 index 00000000..a24cf3c0 --- /dev/null +++ b/src/sempy_labs/migration/_migrate_model_objects_to_semantic_model.py @@ -0,0 +1,524 @@ +import sempy +import sempy.fabric as fabric +import re, datetime, time +from sempy_labs._list_functions import list_tables +from sempy_labs._helper_functions import create_relationship_name +from sempy_labs._tom import connect_semantic_model +from typing import Optional +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_model_objects_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +): + """ + Adds the rest of the model objects (besides tables/columns) and their properties to a Direct Lake semantic model based on an import/DirectQuery semantic model. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + import System + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + else: + workspaceId = fabric.resolve_workspace_id(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + dfT = list_tables(dataset, workspace) + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfM = fabric.list_measures(dataset=dataset, workspace=workspace) + dfR = fabric.list_relationships(dataset=dataset, workspace=workspace) + dfRole = fabric.get_roles(dataset=dataset, workspace=workspace) + dfRLS = fabric.get_row_level_security_permissions( + dataset=dataset, workspace=workspace + ) + dfCI = fabric.list_calculation_items(dataset=dataset, workspace=workspace) + dfP = fabric.list_perspectives(dataset=dataset, workspace=workspace) + dfTranslation = fabric.list_translations(dataset=dataset, workspace=workspace) + dfH = fabric.list_hierarchies(dataset=dataset, workspace=workspace) + dfPar = fabric.list_partitions(dataset=dataset, workspace=workspace) + + dfP_cc = dfPar[(dfPar["Source Type"] == "Calculated")] + dfP_fp = dfP_cc[dfP_cc["Query"].str.contains("NAMEOF")] + dfC_fp = dfC[dfC["Table Name"].isin(dfP_fp["Table Name"].values)] + + print(f"{icons.in_progress} Updating '{new_dataset}' based on '{dataset}'...") + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + + isDirectLake = any( + str(p.Mode) == "DirectLake" + for t in tom.model.Tables + for p in t.Partitions + ) + + print(f"\n{icons.in_progress} Updating table properties...") + for t in tom.model.Tables: + t.IsHidden = bool(dfT.loc[dfT["Name"] == t.Name, "Hidden"].iloc[0]) + t.Description = dfT.loc[dfT["Name"] == t.Name, "Description"].iloc[ + 0 + ] + t.DataCategory = dfT.loc[ + dfT["Name"] == t.Name, "Data Category" + ].iloc[0] + + print( + f"{icons.green_dot} The '{t.Name}' table's properties have been updated." + ) + + print(f"\n{icons.in_progress} Updating column properties...") + for t in tom.model.Tables: + if ( + t.Name not in dfP_fp["Table Name"].values + ): # do not include field parameters + dfT_filtered = dfT[dfT["Name"] == t.Name] + tType = dfT_filtered["Type"].iloc[0] + for c in t.Columns: + if not c.Name.startswith("RowNumber-"): + dfC_filt = dfC[ + (dfC["Table Name"] == t.Name) + & (dfC["Column Name"] == c.Name) + ] + cName = dfC_filt["Column Name"].iloc[0] + c.Name = cName + if tType == "Table": + c.SourceColumn = cName.replace(" ", "_") + c.IsHidden = bool(dfC_filt["Hidden"].iloc[0]) + c.DataType = System.Enum.Parse( + TOM.DataType, dfC_filt["Data Type"].iloc[0] + ) + c.DisplayFolder = dfC_filt["Display Folder"].iloc[0] + c.FormatString = dfC_filt["Format String"].iloc[0] + c.SummarizeBy = System.Enum.Parse( + TOM.AggregateFunction, + dfC_filt["Summarize By"].iloc[0], + ) + c.DataCategory = dfC_filt["Data Category"].iloc[0] + c.IsKey = bool(dfC_filt["Key"].iloc[0]) + sbc = dfC_filt["Sort By Column"].iloc[0] + + if sbc != None: + try: + c.SortByColumn = tom.model.Tables[ + t.Name + ].Columns[sbc] + except: + print( + f"{icons.red_dot} Failed to create '{sbc}' as a Sort By Column for the '{c.Name}' in the '{t.Name}' table." + ) + print( + f"{icons.green_dot} The '{t.Name}'[{c.Name}] column's properties have been updated." + ) + + print(f"\n{icons.in_progress} Creating hierarchies...") + dfH_grouped = ( + dfH.groupby( + [ + "Table Name", + "Hierarchy Name", + "Hierarchy Hidden", + "Hierarchy Description", + ] + ) + .agg({"Level Name": list, "Column Name": list}) + .reset_index() + ) + + for i, r in dfH_grouped.iterrows(): + tName = r["Table Name"] + hName = r["Hierarchy Name"] + hDesc = r["Hierarchy Description"] + hHid = bool(r["Hierarchy Hidden"]) + cols = r["Column Name"] + lvls = r["Level Name"] + + try: + tom.model.Tables[tName].Hierarchies[hName] + except: + tom.add_hierarchy( + table_name=tName, + hierarchy_name=hName, + hierarchy_description=hDesc, + hierarchy_hidden=hHid, + columns=cols, + levels=lvls, + ) + print( + f"{icons.green_dot} The '{hName}' hierarchy has been added." + ) + + print(f"\n{icons.in_progress} Creating measures...") + for i, r in dfM.iterrows(): + tName = r["Table Name"] + mName = r["Measure Name"] + mExpr = r["Measure Expression"] + mHidden = bool(r["Measure Hidden"]) + mDF = r["Measure Display Folder"] + mDesc = r["Measure Description"] + mFS = r["Format String"] + + try: + tom.model.Tables[tName].Measures[mName] + except: + tom.add_measure( + table_name=tName, + measure_name=mName, + expression=mExpr, + hidden=mHidden, + display_folder=mDF, + description=mDesc, + format_string=mFS, + ) + print( + f"{icons.green_dot} The '{mName}' measure has been added." + ) + + for cgName in dfCI["Calculation Group Name"].unique(): + + isHidden = bool( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Hidden" + ].iloc[0] + ) + prec = int( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Precedence" + ].iloc[0] + ) + desc = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName), "Description" + ].iloc[0] + + try: + tom.model.Tables[cgName] + except: + tom.add_calculation_group( + name=cgName, + description=desc, + precedence=prec, + hidden=isHidden, + ) + print( + f"{icons.green_dot} The '{cgName}' calculation group has been added." + ) + tom.model.DiscourageImplicitMeasures = True + + print( + f"\n{icons.in_progress} Updating calculation group column name..." + ) + dfC_filt = dfC[ + (dfC["Table Name"] == cgName) & (dfC["Hidden"] == False) + ] + colName = dfC_filt["Column Name"].iloc[0] + tom.model.Tables[cgName].Columns["Name"].Name = colName + + calcItems = dfCI.loc[ + dfCI["Calculation Group Name"] == cgName, + "Calculation Item Name", + ].unique() + + print(f"\n{icons.in_progress} Creating calculation items...") + for calcItem in calcItems: + ordinal = int( + dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Ordinal", + ].iloc[0] + ) + expr = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Expression", + ].iloc[0] + fse = dfCI.loc[ + (dfCI["Calculation Group Name"] == cgName) + & (dfCI["Calculation Item Name"] == calcItem), + "Format String Expression", + ].iloc[0] + try: + tom.model.Tables[cgName].CalculationGroup.CalculationItems[ + calcItem + ] + except: + tom.add_calculation_item( + table_name=cgName, + calculation_item_name=calcItem, + expression=expr, + format_string_expression=fse, + ordinal=ordinal, + ) + print( + f"{icons.green_dot} The '{calcItem}' has been added to the '{cgName}' calculation group." + ) + + print(f"\n{icons.in_progress} Creating relationships...") + for index, row in dfR.iterrows(): + fromTable = row["From Table"] + fromColumn = row["From Column"] + toTable = row["To Table"] + toColumn = row["To Column"] + isActive = row["Active"] + cfb = row["Cross Filtering Behavior"] + sfb = row["Security Filtering Behavior"] + rori = row["Rely On Referential Integrity"] + mult = row["Multiplicity"] + + card_mapping = {"m": "Many", "1": "One", "0": "None"} + + fromCard = card_mapping.get(mult[0]) + toCard = card_mapping.get(mult[-1]) + + relName = create_relationship_name( + fromTable, fromColumn, toTable, toColumn + ) + + if any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} already exists as a relationship in the semantic model." + ) + elif isDirectLake and any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + and ( + r.FromColumn.DataType == "DateTime" + or r.ToColumn.DataType == "DateTime" + ) + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} was not created since relationships based on DateTime columns are not supported." + ) + elif isDirectLake and any( + r.FromTable.Name == fromTable + and r.FromColumn.Name == fromColumn + and r.ToTable.Name == toTable + and r.ToColumn.Name == toColumn + and (r.FromColumn.DataType != r.ToColumn.DataType) + for r in tom.model.Relationships + ): + print( + f"{icons.yellow_dot} {relName} was not created since columns used in a relationship must have the same data type." + ) + else: + try: + tom.add_relationship( + from_table=fromTable, + from_column=fromColumn, + to_table=toTable, + to_column=toColumn, + from_cardinality=fromCard, + to_cardinality=toCard, + cross_filtering_behavior=cfb, + security_filtering_behavior=sfb, + rely_on_referential_integrity=rori, + is_active=isActive, + ) + + print( + f"{icons.green_dot} The {relName} relationship has been added." + ) + except: + print( + f"{icons.red_dot} The {relName} relationship was not added." + ) + + print(f"\n{icons.in_progress} Creating roles...") + for index, row in dfRole.iterrows(): + roleName = row["Role"] + roleDesc = row["Description"] + modPerm = row["Model Permission"] + + try: + tom.model.Roles[roleName] + except: + tom.add_role( + role_name=roleName, + model_permission=modPerm, + description=roleDesc, + ) + print( + f"{icons.green_dot} The '{roleName}' role has been added." + ) + + print(f"\n{icons.in_progress} Creating row level security...") + for index, row in dfRLS.iterrows(): + roleName = row["Role"] + tName = row["Table"] + expr = row["Filter Expression"] + + try: + tom.set_rls( + role_name=roleName, table_name=tName, filter_expression=expr + ) + print( + f"{icons.green_dot} Row level security for the '{tName}' table within the '{roleName}' role has been set." + ) + except: + print( + f"{icons.red_dot} Row level security for the '{tName}' table within the '{roleName}' role was not set." + ) + + print(f"\n{icons.in_progress} Creating perspectives...") + for pName in dfP["Perspective Name"].unique(): + + try: + tom.model.Perspectives[pName] + except: + tom.add_perspective(perspective_name=pName) + print( + f"{icons.green_dot} The '{pName}' perspective has been added." + ) + + print(f"\n{icons.in_progress} Adding objects to perspectives...") + for index, row in dfP.iterrows(): + pName = row["Perspective Name"] + tName = row["Table Name"] + oName = row["Object Name"] + oType = row["Object Type"] + tType = dfT.loc[(dfT["Name"] == tName), "Type"].iloc[0] + + try: + if oType == "Table": + tom.add_to_perspective( + object=tom.model.Tables[tName], perspective_name=pName + ) + elif oType == "Column": + tom.add_to_perspective( + object=tom.model.Tables[tName].Columns[oName], + perspective_name=pName, + ) + elif oType == "Measure": + tom.add_to_perspective( + object=tom.model.Tables[tName].Measures[oName], + perspective_name=pName, + ) + elif oType == "Hierarchy": + tom.add_to_perspective( + object=tom.model.Tables[tName].Hierarchies[oName], + perspective_name=pName, + ) + except: + pass + + print(f"\n{icons.in_progress} Creating translation languages...") + for trName in dfTranslation["Culture Name"].unique(): + try: + tom.model.Cultures[trName] + except: + tom.add_translation(trName) + print( + f"{icons.green_dot} The '{trName}' translation language has been added." + ) + + print(f"\n{icons.in_progress} Creating translation values...") + for index, row in dfTranslation.iterrows(): + trName = row["Culture Name"] + tName = row["Table Name"] + oName = row["Object Name"] + oType = row["Object Type"] + translation = row["Translation"] + prop = row["Property"] + + if prop == "Caption": + prop = "Name" + elif prop == "DisplayFolder": + prop = "Display Folder" + + try: + if oType == "Table": + tom.set_translation( + object=tom.model.Tables[tName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Column": + tom.set_translation( + object=tom.model.Tables[tName].Columns[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Measure": + tom.set_translation( + object=tom.model.Tables[tName].Measures[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Hierarchy": + tom.set_translation( + object=tom.model.Tables[tName].Hierarchies[oName], + language=trName, + property=prop, + value=translation, + ) + elif oType == "Level": + + pattern = r"\[([^]]+)\]" + matches = re.findall(pattern, oName) + lName = matches[0] + + pattern = r"'([^']+)'" + matches = re.findall(pattern, oName) + hName = matches[0] + tom.set_translation( + object=tom.model.Tables[tName] + .Hierarchies[hName] + .Levels[lName], + language=trName, + property=prop, + value=translation, + ) + except: + pass + + print( + f"\n{icons.green_dot} Migration of objects from '{dataset}' -> '{new_dataset}' is complete." + ) + + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py new file mode 100644 index 00000000..5a9721d2 --- /dev/null +++ b/src/sempy_labs/migration/_migrate_tables_columns_to_semantic_model.py @@ -0,0 +1,165 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import datetime, time +from sempy_labs._list_functions import list_tables +from sempy_labs.directlake._get_shared_expression import get_shared_expression +from sempy_labs._helper_functions import resolve_lakehouse_name +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy_labs._tom import connect_semantic_model +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def migrate_tables_columns_to_semantic_model( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, + lakehouse: Optional[str] = None, + lakehouse_workspace: Optional[str] = None, +): + """ + Adds tables/columns to the new Direct Lake semantic model based on an import/DirectQuery semantic model. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str, default=None + The Fabric lakehouse used by the Direct Lake semantic model. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str, default=None + The Fabric workspace used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + """ + + workspace = fabric.resolve_workspace_name(workspace) + + if new_dataset_workspace == None: + new_dataset_workspace = workspace + + if lakehouse_workspace == None: + lakehouse_workspace = new_dataset_workspace + + if lakehouse == None: + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) + + # Check that lakehouse is attached to the notebook + lakeAttach = lakehouse_attached() + + # Run if lakehouse is attached to the notebook or a lakehouse & lakehouse workspace are specified + if lakeAttach or (lakehouse is not None and lakehouse_workspace is not None): + shEx = get_shared_expression(lakehouse, lakehouse_workspace) + + dfC = fabric.list_columns(dataset=dataset, workspace=workspace) + dfT = list_tables(dataset, workspace) + dfT.rename(columns={"Type": "Table Type"}, inplace=True) + dfC = pd.merge( + dfC, + dfT[["Name", "Table Type"]], + left_on="Table Name", + right_on="Name", + how="left", + ) + dfT_filt = dfT[dfT["Table Type"] == "Table"] + dfC_filt = dfC[ + (dfC["Table Type"] == "Table") + & ~(dfC["Column Name"].str.startswith("RowNumber-")) + & (dfC["Type"] != "Calculated") + ] + + print(f"{icons.in_progress} Updating '{new_dataset}' based on '{dataset}'...") + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=new_dataset, readonly=False, workspace=new_dataset_workspace + ) as tom: + success = True + try: + tom.model.Expressions["DatabaseQuery"] + except: + tom.add_expression("DatabaseQuery", expression=shEx) + print( + f"{icons.green_dot} The 'DatabaseQuery' expression has been added." + ) + + for i, r in dfT_filt.iterrows(): + tName = r["Name"] + tDC = r["Data Category"] + tHid = bool(r["Hidden"]) + tDesc = r["Description"] + + try: + tom.model.Tables[tName] + except: + tom.add_table( + name=tName, + description=tDesc, + data_category=tDC, + hidden=tHid, + ) + tom.add_entity_partition( + table_name=tName, entity_name=tName.replace(" ", "_") + ) + print( + f"{icons.green_dot} The '{tName}' table has been added." + ) + + for i, r in dfC_filt.iterrows(): + tName = r["Table Name"] + cName = r["Column Name"] + scName = r["Source"].replace(" ", "_") + cHid = bool(r["Hidden"]) + cDataType = r["Data Type"] + + try: + tom.model.Tables[tName].Columns[cName] + except: + tom.add_data_column( + table_name=tName, + column_name=cName, + source_column=scName, + hidden=cHid, + data_type=cDataType, + ) + print( + f"{icons.green_dot} The '{tName}'[{cName}] column has been added." + ) + + print( + f"\n{icons.green_dot} All regular tables and columns have been added to the '{new_dataset}' semantic model." + ) + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) + else: + print( + f"{icons.red_dot} Lakehouse not attached to notebook and lakehouse/lakehouse_workspace are not specified. Please add your lakehouse to this notebook or specify the lakehouse/lakehouse_workspace parameters." + ) + print( + f"To attach a lakehouse to a notebook, go to the the 'Explorer' window to the left, click 'Lakehouses' to add your lakehouse to this notebook" + ) + print( + f"\nLearn more here: https://learn.microsoft.com/fabric/data-engineering/lakehouse-notebook-explore#add-or-remove-a-lakehouse" + ) diff --git a/src/sempy_labs/migration/_migration_validation.py b/src/sempy_labs/migration/_migration_validation.py new file mode 100644 index 00000000..42935d6d --- /dev/null +++ b/src/sempy_labs/migration/_migration_validation.py @@ -0,0 +1,227 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +from sempy_labs._helper_functions import create_relationship_name +from sempy_labs._tom import connect_semantic_model +from typing import List, Optional, Union +from sempy._utils._log import log + + +def list_semantic_model_objects(dataset: str, workspace: Optional[str] = None): + """ + Shows a list of semantic model objects. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of objects in the semantic model + """ + + df = pd.DataFrame(columns=["Parent Name", "Object Name", "Object Type"]) + with connect_semantic_model( + dataset=dataset, workspace=workspace, readonly=True + ) as tom: + for t in tom.model.Tables: + if t.CalculationGroup is not None: + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": "Calculation Group", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for ci in t.CalculationGroup.CalculationItems: + new_data = { + "Parent Name": t.Name, + "Object Name": ci.Name, + "Object Type": str(ci.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + elif any(str(p.SourceType) == "Calculated" for p in t.Partitions): + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": "Calculated Table", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + new_data = { + "Parent Name": t.Parent.Name, + "Object Name": t.Name, + "Object Type": str(t.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for c in t.Columns: + if str(c.Type) != "RowNumber": + if str(c.Type) == "Calculated": + new_data = { + "Parent Name": c.Parent.Name, + "Object Name": c.Name, + "Object Type": "Calculated Column", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + else: + new_data = { + "Parent Name": c.Parent.Name, + "Object Name": c.Name, + "Object Type": str(c.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for m in t.Measures: + new_data = { + "Parent Name": m.Parent.Name, + "Object Name": m.Name, + "Object Type": str(m.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for h in t.Hierarchies: + new_data = { + "Parent Name": h.Parent.Name, + "Object Name": h.Name, + "Object Type": str(h.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for l in h.Levels: + new_data = { + "Parent Name": l.Parent.Name, + "Object Name": l.Name, + "Object Type": str(l.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for p in t.Partitions: + new_data = { + "Parent Name": p.Parent.Name, + "Object Name": p.Name, + "Object Type": str(p.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for r in tom.model.Relationships: + rName = create_relationship_name( + r.FromTable.Name, r.FromColumn.Name, r.ToTable.Name, r.ToColumn.Name + ) + new_data = { + "Parent Name": r.Parent.Name, + "Object Name": rName, + "Object Type": str(r.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for role in tom.model.Roles: + new_data = { + "Parent Name": role.Parent.Name, + "Object Name": role.Name, + "Object Type": str(role.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for rls in role.TablePermissions: + new_data = { + "Parent Name": role.Name, + "Object Name": rls.Name, + "Object Type": str(rls.ObjectType), + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + for tr in tom.model.Cultures: + new_data = { + "Parent Name": tr.Parent.Name, + "Object Name": tr.Name, + "Object Type": str(tr.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + for per in tom.model.Perspectives: + new_data = { + "Parent Name": per.Parent.Name, + "Object Name": per.Name, + "Object Type": str(per.ObjectType), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +@log +def migration_validation( + dataset: str, + new_dataset: str, + workspace: Optional[str] = None, + new_dataset_workspace: Optional[str] = None, +) -> pd.DataFrame: + """ + Shows the objects in the original semantic model and whether then were migrated successfully or not. + + Parameters + ---------- + dataset : str + Name of the import/DirectQuery semantic model. + new_dataset : str + Name of the Direct Lake semantic model. + workspace : str, default=None + The Fabric workspace name in which the import/DirectQuery semantic model exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + new_dataset_workspace : str + The Fabric workspace name in which the Direct Lake semantic model will be created. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of objects and whether they were successfully migrated. Also shows the % of objects which were migrated successfully. + """ + + dfA = list_semantic_model_objects(dataset=dataset, workspace=workspace) + dfB = list_semantic_model_objects( + dataset=new_dataset, workspace=new_dataset_workspace + ) + + def is_migrated(row): + if row["Object Type"] == "Calculated Table": + return ( + (dfB["Parent Name"] == row["Parent Name"]) + & (dfB["Object Name"] == row["Object Name"]) + & (dfB["Object Type"].isin(["Calculated Table", "Table"])) + ).any() + else: + return ( + (dfB["Parent Name"] == row["Parent Name"]) + & (dfB["Object Name"] == row["Object Name"]) + & (dfB["Object Type"] == row["Object Type"]) + ).any() + + dfA["Migrated"] = dfA.apply(is_migrated, axis=1) + + denom = len(dfA) + num = len(dfA[dfA["Migrated"]]) + print(f"{100 * round(num / denom,2)}% migrated") + + return dfA diff --git a/src/sempy_labs/migration/_refresh_calc_tables.py b/src/sempy_labs/migration/_refresh_calc_tables.py new file mode 100644 index 00000000..467b223a --- /dev/null +++ b/src/sempy_labs/migration/_refresh_calc_tables.py @@ -0,0 +1,129 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import re, datetime, time +from pyspark.sql import SparkSession +from sempy_labs._tom import connect_semantic_model +from typing import List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +@log +def refresh_calc_tables(dataset: str, workspace: Optional[str] = None): + """ + Recreates the delta tables in the lakehouse based on the DAX expressions stored as model annotations in the Direct Lake semantic model. + + Parameters + ---------- + dataset : str + Name of the semantic model. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + spark = SparkSession.builder.getOrCreate() + + start_time = datetime.datetime.now() + timeout = datetime.timedelta(minutes=1) + success = False + + while not success: + try: + with connect_semantic_model( + dataset=dataset, readonly=True, workspace=workspace + ) as tom: + success = True + for a in tom.model.Annotations: + if any(a.Name == t.Name for t in tom.model.Tables): + tName = a.Name + query = a.Value + + if not query.startswith("EVALUATE"): + daxquery = "EVALUATE \n" + query + else: + daxquery = query + + try: + df = fabric.evaluate_dax( + dataset=dataset, + dax_string=daxquery, + workspace=workspace, + ) + + # Update column names for non-field parameters + if query.find("NAMEOF") == -1: + for old_column_name in df.columns: + pattern = r"\[([^\]]+)\]" + + matches = re.findall(pattern, old_column_name) + new_column_name = matches[0] + new_column_name = new_column_name.replace(" ", "") + + df.rename( + columns={old_column_name: new_column_name}, + inplace=True, + ) + + # Update data types for lakehouse columns + dataType = next( + str(c.DataType) + for c in tom.all_columns() + if c.Parent.Name == tName + and c.SourceColumn == new_column_name + ) + # dfC_type = dfC[(dfC['Table Name'] == tName) & (dfC['Source'] == new_column_name)] + # dataType = dfC_type['Data Type'].iloc[0] + + if dataType == "Int64": + df[new_column_name] = df[ + new_column_name + ].astype(int) + elif dataType in ["Decimal", "Double"]: + df[new_column_name] = df[ + new_column_name + ].astype(float) + elif dataType == "Boolean": + df[new_column_name] = df[ + new_column_name + ].astype(bool) + elif dataType == "DateTime": + df[new_column_name] = pd.to_datetime( + df[new_column_name] + ) + else: + df[new_column_name] = df[ + new_column_name + ].astype(str) + # else: + # second_column_name = df.columns[1] + # third_column_name = df.columns[2] + # df[third_column_name] = df[third_column_name].astype(int) + + # Remove calc columns from field parameters + # mask = df[second_column_name].isin(dfC_filt['Full Column Name']) + # df = df[~mask] + + delta_table_name = tName.replace(" ", "_") + print( + f"{icons.in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress..." + ) + + spark_df = spark.createDataFrame(df) + spark_df.write.mode("overwrite").format( + "delta" + ).saveAsTable(delta_table_name) + print( + f"{icons.green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse." + ) + except: + print( + f"{icons.red_dot} Failed to create calculated table '{tName}' as a delta table in the lakehouse." + ) + + except Exception as e: + if datetime.datetime.now() - start_time > timeout: + break + time.sleep(1) diff --git a/src/sempy_labs/report/__init__.py b/src/sempy_labs/report/__init__.py new file mode 100644 index 00000000..51e905f8 --- /dev/null +++ b/src/sempy_labs/report/__init__.py @@ -0,0 +1,35 @@ +from sempy_labs.report._generate_report import ( + create_report_from_reportjson, + update_report_from_reportjson, +) +from sempy_labs.report._report_functions import ( + get_report_json, + # report_dependency_tree, + export_report, + clone_report, + launch_report, + # list_report_pages, + # list_report_visuals, + # list_report_bookmarks, + # translate_report_titles +) +from sempy_labs.report._report_rebind import ( + report_rebind, + report_rebind_all, +) + +__all__ = [ + "create_report_from_reportjson", + "update_report_from_reportjson", + "get_report_json", + # report_dependency_tree, + "export_report", + "clone_report", + "launch_report", + # list_report_pages, + # list_report_visuals, + # list_report_bookmarks, + # translate_report_titles, + "report_rebind", + "report_rebind_all", +] diff --git a/src/sempy_labs/report/_generate_report.py b/src/sempy_labs/report/_generate_report.py new file mode 100644 index 00000000..a9b560bf --- /dev/null +++ b/src/sempy_labs/report/_generate_report.py @@ -0,0 +1,253 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import json, base64, time +from typing import Optional +from sempy_labs._helper_functions import resolve_workspace_name_and_id + + +def create_report_from_reportjson( + report: str, + dataset: str, + report_json: str, + theme_json: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Creates a report based on a report.json file (and an optional themes.json file). + + Parameters + ---------- + report : str + Name of the report. + dataset : str + Name of the semantic model to connect to the report. + report_json : str + The report.json file to be used to create the report. + theme_json : str, default=None + The theme.json file to be used for the theme of the report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + objectType = "Report" + + dfI_m = fabric.list_items(workspace=workspace, type="SemanticModel") + dfI_model = dfI_m[(dfI_m["Display Name"] == dataset)] + + if len(dfI_model) == 0: + print( + f"ERROR: The '{dataset}' semantic model does not exist in the '{workspace}' workspace." + ) + return + + datasetId = dfI_model["Id"].iloc[0] + + dfI_r = fabric.list_items(workspace=workspace, type="Report") + dfI_rpt = dfI_r[(dfI_r["Display Name"] == report)] + + if len(dfI_rpt) > 0: + print( + f"WARNING: '{report}' already exists as a report in the '{workspace}' workspace." + ) + return + + client = fabric.FabricRestClient() + defPBIR = { + "version": "1.0", + "datasetReference": { + "byPath": None, + "byConnection": { + "connectionString": None, + "pbiServiceModelId": None, + "pbiModelVirtualServerName": "sobe_wowvirtualserver", + "pbiModelDatabaseName": datasetId, + "name": "EntityDataSource", + "connectionType": "pbiServiceXmlaStyleLive", + }, + }, + } + + def conv_b64(file): + + loadJson = json.dumps(file) + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + + return f + + definitionPBIR = conv_b64(defPBIR) + payloadReportJson = conv_b64(report_json) + + if theme_json == None: + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": definitionPBIR, + "payloadType": "InlineBase64", + }, + ] + }, + } + else: + payloadThemeJson = conv_b64(theme_json) + themeID = theme_json["payload"]["blob"]["displayName"] + themePath = "StaticResources/SharedResources/BaseThemes/" + themeID + ".json" + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": themePath, + "payload": payloadThemeJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": definitionPBIR, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post(f"/v1/workspaces/{workspace_id}/items", json=request_body) + + if response.status_code == 201: + print("Report creation succeeded") + print(response.json()) + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + print("Report creation succeeded") + print(response.json()) + + +def update_report_from_reportjson( + report: str, report_json: str, workspace: Optional[str] = None +): + """ + Updates a report based on a report.json file. + + Parameters + ---------- + report : str + Name of the report. + report_json : str + The report.json file to be used to update the report. + workspace : str, default=None + The Fabric workspace name in which the report resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + objectType = "Report" + + dfR = fabric.list_reports(workspace=workspace) + dfR_filt = dfR[(dfR["Name"] == report) & (dfR["Report Type"] == "PowerBIReport")] + + if len(dfR_filt) == 0: + print(f"The '{report}' report does not exist in the '{workspace}' workspace.") + return + + reportId = dfR_filt["Id"].iloc[0] + client = fabric.FabricRestClient() + + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{reportId}/getDefinition" + ) + df_items = pd.json_normalize(response.json()["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "definition.pbir"] + rptDefFile = df_items_filt["payload"].iloc[0] + # datasetId = dfR_filt['Dataset Id'].iloc[0] + # datasetWorkspaceId = dfR_filt['Dataset Workspace Id'].iloc[0] + + # defPBIR = { + # "version": "1.0", + # "datasetReference": { + # "byPath": None, + # "byConnection": { + # "connectionString": None, + # "pbiServiceModelId": None, + # "pbiModelVirtualServerName": "sobe_wowvirtualserver", + # "pbiModelDatabaseName": datasetId, + # "name": "EntityDataSource", + # "connectionType": "pbiServiceXmlaStyleLive" + # } + # } + # } + + def conv_b64(file): + + loadJson = json.dumps(file) + f = base64.b64encode(loadJson.encode("utf-8")).decode("utf-8") + + return f + + # definitionPBIR = conv_b64(defPBIR) + payloadReportJson = conv_b64(report_json) + + request_body = { + "displayName": report, + "type": objectType, + "definition": { + "parts": [ + { + "path": "report.json", + "payload": payloadReportJson, + "payloadType": "InlineBase64", + }, + { + "path": "definition.pbir", + "payload": rptDefFile, + "payloadType": "InlineBase64", + }, + ] + }, + } + + response = client.post( + f"/v1/workspaces/{workspace_id}/reports/{reportId}/updateDefinition", + json=request_body, + ) + + if response.status_code == 201: + print(f"The '{report}' report has been successfully updated.") + # print(response.json()) + elif response.status_code == 202: + operationId = response.headers["x-ms-operation-id"] + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + while response_body["status"] != "Succeeded": + time.sleep(3) + response = client.get(f"/v1/operations/{operationId}") + response_body = json.loads(response.content) + response = client.get(f"/v1/operations/{operationId}/result") + print(f"The '{report}' report has been successfully updated.") + # print(response.json()) diff --git a/src/sempy_labs/report/_report_functions.py b/src/sempy_labs/report/_report_functions.py new file mode 100644 index 00000000..4aa49675 --- /dev/null +++ b/src/sempy_labs/report/_report_functions.py @@ -0,0 +1,855 @@ +import sempy +import sempy.fabric as fabric +import pandas as pd +import json, os, time, base64, copy, re +from anytree import Node, RenderTree +from powerbiclient import Report +from synapse.ml.services import Translate +from pyspark.sql.functions import col, flatten +from pyspark.sql import SparkSession +from sempy_labs.report._generate_report import update_report_from_reportjson +from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy_labs._helper_functions import ( + generate_embedded_filter, + resolve_dataset_name, + resolve_report_id, + resolve_lakehouse_name, + language_validate, + resolve_workspace_name_and_id, +) +from typing import Any, List, Optional, Union +from sempy._utils._log import log +import sempy_labs._icons as icons + + +def get_report_json( + report: str, + workspace: Optional[str] = None, + save_to_file_name: Optional[str] = None, +) -> Any: + """ + Gets the report.json file content of a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + save_to_file_name : str, default=None + Specifying this parameter will save the report.json file to the lakehouse attached to the notebook with the file name of this parameter. + + Returns + ------- + Any + The report.json file for a given Power BI report. + """ + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + client = fabric.FabricRestClient() + + dfI = fabric.list_items(workspace=workspace, type="Report") + dfI_filt = dfI[(dfI["Display Name"] == report)] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist in the '{workspace}' workspace." + ) + return + + itemId = dfI_filt["Id"].iloc[0] + response = client.post( + f"/v1/workspaces/{workspace_id}/items/{itemId}/getDefinition" + ) + df_items = pd.json_normalize(response.json()["definition"]["parts"]) + df_items_filt = df_items[df_items["path"] == "report.json"] + payload = df_items_filt["payload"].iloc[0] + + reportFile = base64.b64decode(payload).decode("utf-8") + reportJson = json.loads(reportFile) + + if save_to_file_name is not None: + lakeAttach = lakehouse_attached() + if lakeAttach == False: + print( + f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + lakehouse_id = fabric.get_lakehouse_id() + lakehouse = resolve_lakehouse_name(lakehouse_id, workspace) + folderPath = "/lakehouse/default/Files" + fileExt = ".json" + if not save_to_file_name.endswith(fileExt): + save_to_file_name = save_to_file_name + fileExt + filePath = os.path.join(folderPath, save_to_file_name) + with open(filePath, "w") as json_file: + json.dump(reportJson, json_file, indent=4) + print( + f"{icons.green_dot} The report.json file for the '{report}' report has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + ) + + return reportJson + + +def report_dependency_tree(workspace: Optional[str] = None): + """ + Prints a dependency between reports and semantic models. + + Parameters + ---------- + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if workspace == None: + workspaceId = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspaceId) + + dfR = fabric.list_reports(workspace=workspace) + dfD = fabric.list_datasets(workspace=workspace) + dfR = pd.merge( + dfR, + dfD[["Dataset ID", "Dataset Name"]], + left_on="Dataset Id", + right_on="Dataset ID", + how="left", + ) + dfR.rename(columns={"Name": "Report Name"}, inplace=True) + dfR = dfR[["Report Name", "Dataset Name"]] + + report_icon = "\U0001F4F6" + dataset_icon = "\U0001F9CA" + workspace_icon = "\U0001F465" + + node_dict = {} + rootNode = Node(workspace) + node_dict[workspace] = rootNode + rootNode.custom_property = workspace_icon + " " + + for i, r in dfR.iterrows(): + datasetName = r["Dataset Name"] + reportName = r["Report Name"] + parentNode = node_dict.get(datasetName) + if parentNode is None: + parentNode = Node(datasetName, parent=rootNode) + node_dict[datasetName] = parentNode + parentNode.custom_property = dataset_icon + " " + + child_node = Node(reportName, parent=parentNode) + child_node.custom_property = report_icon + " " + + # Print the tree structure + for pre, _, node in RenderTree(node_dict[workspace]): + print(f"{pre}{node.custom_property}'{node.name}'") + + +@log +def export_report( + report: str, + export_format: str, + file_name: Optional[str] = None, + bookmark_name: Optional[str] = None, + page_name: Optional[str] = None, + visual_name: Optional[str] = None, + report_filter: Optional[str] = None, + workspace: Optional[str] = None, +): + """ + Exports a Power BI report to a file in your lakehouse. + + Parameters + ---------- + report : str + Name of the Power BI report. + export_format : str + The format in which to export the report. For image formats, enter the file extension in this parameter, not 'IMAGE'. + `Valid formats `_ + file_name : str, default=None + The name of the file to be saved within the lakehouse. Do not include the file extension. Defaults ot the reportName parameter value. + bookmark_name : str, default=None + The name (GUID) of a bookmark within the report. + page_name : str, default=None + The name (GUID) of the report page. + visual_name : str, default=None + The name (GUID) of a visual. If you specify this parameter you must also specify the page_name parameter. + report_filter : str, default=None + A report filter to be applied when exporting the report. Syntax is user-friendly. See above for examples. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + # https://learn.microsoft.com/rest/api/power-bi/reports/export-to-file-in-group + + lakeAttach = lakehouse_attached() + + if lakeAttach == False: + print( + f"{icons.red_dot} In order to run the 'export_report' function, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + ) + return + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + if isinstance(page_name, str): + page_name = [page_name] + if isinstance(visual_name, str): + visual_name = [visual_name] + + if bookmark_name is not None and (page_name is not None or visual_name is not None): + print( + f"{icons.red_dot} If the 'bookmark_name' parameter is set, the 'page_name' and 'visual_name' parameters must not be set." + ) + return + if visual_name is not None and page_name is None: + print( + f"{icons.red_dot} If the 'visual_name' parameter is set, the 'page_name' parameter must be set." + ) + return + + validFormats = { + "ACCESSIBLEPDF": ".pdf", + "CSV": ".csv", + "DOCX": ".docx", + "MHTML": ".mhtml", + "PDF": ".pdf", + "PNG": ".png", + "PPTX": ".pptx", + "XLSX": ".xlsx", + "XML": ".xml", + "BMP": ".bmp", + "EMF": ".emf", + "GIF": ".gif", + "JPEG": ".jpeg", + "TIFF": ".tiff", + } + + export_format = export_format.upper() + + fileExt = validFormats.get(export_format) + if fileExt is None: + print( + f"{icons.red_dot} The '{export_format}' format is not a valid format for exporting Power BI reports. Please enter a valid format. Options: {validFormats}" + ) + return + + if file_name == None: + file_name = report + fileExt + else: + file_name = file_name + fileExt + + folderPath = "/lakehouse/default/Files" + filePath = os.path.join(folderPath, file_name) + + dfI = fabric.list_items(workspace=workspace) + dfI_filt = dfI[ + (dfI["Type"].isin(["Report", "PaginatedReport"])) + & (dfI["Display Name"] == report) + ] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist in the '{workspace}' workspace." + ) + return + + reportType = dfI_filt["Type"].iloc[0] + + # Limitations + pbiOnly = ["PNG"] + paginatedOnly = [ + "ACCESSIBLEPDF", + "CSV", + "DOCX", + "BMP", + "EMF", + "GIF", + "JPEG", + "TIFF", + "MHTML", + "XLSX", + "XML", + ] + + if reportType == "Report" and export_format in paginatedOnly: + print( + f"{icons.red_dot} The '{export_format}' format is only supported for paginated reports." + ) + return + if reportType == "PaginatedReport" and export_format in pbiOnly: + print( + f"{icons.red_dot} The '{export_format}' format is only supported for Power BI reports." + ) + return + + if reportType == "PaginatedReport" and ( + bookmark_name is not None or page_name is not None or visual_name is not None + ): + print( + f"{icons.red_dot} Export for paginated reports does not support bookmarks/pages/visuals. Those parameters must not be set for paginated reports." + ) + return + + reportId = dfI_filt["Id"].iloc[0] + client = fabric.PowerBIRestClient() + + dfVisual = list_report_visuals(report=report, workspace=workspace) + dfPage = list_report_pages(report=report, workspace=workspace) + + if ( + export_format in ["BMP", "EMF", "GIF", "JPEG", "TIFF"] + and reportType == "PaginatedReport" + ): + request_body = { + "format": "IMAGE", + "paginatedReportConfiguration": { + "formatSettings": {"OutputFormat": export_format.lower()} + }, + } + elif bookmark_name is None and page_name is None and visual_name is None: + request_body = {"format": export_format} + elif bookmark_name is not None: + if reportType == "Report": + request_body = { + "format": export_format, + "powerBIReportConfiguration": { + "defaultBookmark": {"name": bookmark_name} + }, + } + elif page_name is not None and visual_name is None: + if reportType == "Report": + request_body = {"format": export_format, "powerBIReportConfiguration": {}} + + request_body["powerBIReportConfiguration"]["pages"] = [] + + for page in page_name: + dfPage_filt = dfPage[dfPage["Page ID"] == page] + if len(dfPage_filt) == 0: + print( + f"{icons.red_dot} The '{page}' page does not exist in the '{report}' report within the '{workspace}' workspace." + ) + return + page_dict = {"pageName": page} + request_body["powerBIReportConfiguration"]["pages"].append(page_dict) + + elif page_name is not None and visual_name is not None: + if len(page_name) != len(visual_name): + print( + f"{icons.red_dot} Each 'visual_name' must map to a single 'page_name'." + ) + return + if reportType == "Report": + request_body = {"format": export_format, "powerBIReportConfiguration": {}} + + request_body["powerBIReportConfiguration"]["pages"] = [] + a = 0 + for page in page_name: + visual = visual_name[a] + dfVisual_filt = dfVisual[ + (dfVisual["Page ID"] == page) & (dfVisual["Visual ID"] == visual) + ] + if len(dfVisual_filt) == 0: + print( + f"{icons.red_dot} The '{visual}' visual does not exist on the '{page}' in the '{report}' report within the '{workspace}' workspace." + ) + return + page_dict = {"pageName": page, "visualName": visual} + request_body["powerBIReportConfiguration"]["pages"].append(page_dict) + a += 1 + + # Transform and add report filter if it is specified + if report_filter is not None and reportType == "Report": + reportFilter = generate_embedded_filter(filter=report_filter) + report_level_filter = {"filter": reportFilter} + + if "powerBIReportConfiguration" not in request_body: + request_body["powerBIReportConfiguration"] = {} + request_body["powerBIReportConfiguration"]["reportLevelFilters"] = [ + report_level_filter + ] + print(request_body) + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/ExportTo", + json=request_body, + ) + if response.status_code == 202: + response_body = json.loads(response.content) + exportId = response_body["id"] + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}" + ) + response_body = json.loads(response.content) + while response_body["status"] not in ["Succeeded", "Failed"]: + time.sleep(3) + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}" + ) + response_body = json.loads(response.content) + if response_body["status"] == "Failed": + print( + f"{icons.red_dot} The export for the '{report}' report within the '{workspace}' workspace in the '{export_format}' format has failed." + ) + else: + response = client.get( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/exports/{exportId}/file" + ) + print( + f"{icons.in_progress} Saving the '{export_format}' export for the '{report}' report within the '{workspace}' workspace to the lakehouse..." + ) + with open(filePath, "wb") as export_file: + export_file.write(response.content) + print( + f"{icons.green_dot} The '{export_format}' export for the '{report}' report within the '{workspace}' workspace has been saved to the following location: '{filePath}'." + ) + + +def clone_report( + report: str, + cloned_report: str, + workspace: Optional[str] = None, + target_workspace: Optional[str] = None, + target_dataset: Optional[str] = None, +): + """ + Clones a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + cloned_report : str + Name of the new Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + target_workspace : str, default=None + The name of the Fabric workspace to place the cloned report. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + target_dataset : str, default=None + The name of the semantic model to be used by the cloned report. + Defaults to None which resolves to the semantic model used by the initial report. + """ + + # https://learn.microsoft.com/rest/api/power-bi/reports/clone-report-in-group + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + dfI = fabric.list_items(workspace=workspace, type="Report") + dfI_filt = dfI[(dfI["Display Name"] == report)] + + if len(dfI_filt) == 0: + print( + f"{icons.red_dot} The '{report}' report does not exist within the '{workspace}' workspace." + ) + return + + reportId = resolve_report_id(report, workspace) + + if target_workspace is None: + target_workspace = workspace + target_workspace_id = workspace_id + else: + dfW = fabric.list_workspaces() + dfW_filt = dfW[dfW["Name"] == target_workspace] + + if len(dfW_filt) == 0: + print(f"{icons.red_dot} The '{workspace}' is not a valid workspace.") + return + target_workspace_id = dfW_filt["Id"].iloc[0] + + if target_dataset == None: + dfR = fabric.list_reports(workspace=target_workspace) + dfR_filt = dfR[dfR["Name"] == report] + target_dataset_id = dfR_filt["Dataset Id"].iloc[0] + target_dataset = resolve_dataset_name( + dataset_id=target_dataset_id, workspace=target_workspace + ) + else: + dfD = fabric.list_datasets(workspace=target_workspace) + dfD_filt = dfD[dfD["Dataset Name"] == target_dataset] + + if len(dfD_filt) == 0: + print( + f"{icons.red_dot} The '{target_dataset}' target dataset does not exist in the '{target_workspace}' workspace." + ) + return + target_dataset_id = dfD_filt["Dataset Id"].iloc[0] + + client = fabric.PowerBIRestClient() + + if target_workspace is None and target_dataset is None: + request_body = {"name": cloned_report} + elif target_workspace is not None and target_dataset is None: + request_body = {"name": cloned_report, "targetWorkspaceId": target_workspace_id} + elif target_workspace is not None and target_dataset is not None: + request_body = { + "name": cloned_report, + "targetModelId": target_dataset_id, + "targetWorkspaceId": target_workspace_id, + } + elif target_workspace is None and target_dataset is not None: + request_body = {"name": cloned_report, "targetModelId": target_dataset_id} + + response = client.post( + f"/v1.0/myorg/groups/{workspace_id}/reports/{reportId}/Clone", json=request_body + ) + + if response.status_code == 200: + print( + f"{icons.green_dot} The '{report}' report has been successfully cloned as the '{cloned_report}' report within the '{target_workspace}' workspace using the '{target_dataset}' semantic model." + ) + else: + print( + f"{icons.red_dot} POST request failed with status code: {response.status_code}" + ) + + +def launch_report(report: str, workspace: Optional[str] = None): + """ + Shows a Power BI report within a Fabric notebook. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + str + An embedded Power BI report within the notebook. + """ + + from .HelperFunctions import resolve_report_id + + (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) + + reportId = resolve_report_id(report, workspace) + + report = Report(group_id=workspace_id, report_id=reportId) + + return report + + +def list_report_pages(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all pages within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the pages within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + df = pd.DataFrame( + columns=["Page ID", "Page Name", "Hidden", "Width", "Height", "Visual Count"] + ) + + reportJson = get_report_json(report=report, workspace=workspace) + + for section in reportJson["sections"]: + pageID = section["name"] + pageName = section["displayName"] + # pageFilters = section['filters'] + pageWidth = section["width"] + pageHeight = section["height"] + visualCount = len(section["visualContainers"]) + pageHidden = False + pageConfig = section["config"] + pageConfigJson = json.loads(pageConfig) + + try: + pageH = pageConfigJson["visibility"] + if pageH == 1: + pageHidden = True + except: + pass + + new_data = { + "Page ID": pageID, + "Page Name": pageName, + "Hidden": pageHidden, + "Width": pageWidth, + "Height": pageHeight, + "Visual Count": visualCount, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + df["Hidden"] = df["Hidden"].astype(bool) + intCol = ["Width", "Height", "Visual Count"] + df[intCol] = df[intCol].astype(int) + + return df + + +def list_report_visuals(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all visuals within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the visuals within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + reportJson = get_report_json(report=report, workspace=workspace) + + df = pd.DataFrame(columns=["Page Name", "Page ID", "Visual ID", "Title"]) + + for section in reportJson["sections"]: + pageID = section["name"] + pageName = section["displayName"] + + for visual in section["visualContainers"]: + visualConfig = visual["config"] + visualConfigJson = json.loads(visualConfig) + visualID = visualConfigJson["name"] + + try: + title = visualConfigJson["singleVisual"]["vcObjects"]["title"][0][ + "properties" + ]["text"]["expr"]["Literal"]["Value"] + title = title[1:-1] + except: + title = "" + + new_data = { + "Page Name": pageName, + "Page ID": pageID, + "Visual ID": visualID, + "Title": title, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def list_report_bookmarks(report: str, workspace: Optional[str] = None): + """ + Shows the properties of all bookmarks within a Power BI report. + + Parameters + ---------- + report : str + Name of the Power BI report. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the bookmarks within a Power BI report and their properties. + """ + + if workspace == None: + workspace_id = fabric.get_workspace_id() + workspace = fabric.resolve_workspace_name(workspace_id) + + df = pd.DataFrame( + columns=[ + "Bookmark ID", + "Bookmark Name", + "Page ID", + "Visual ID", + "Visual Hidden", + ] + ) + + reportJson = get_report_json(report=report, workspace=workspace) + reportConfig = reportJson["config"] + reportConfigJson = json.loads(reportConfig) + + try: + for bookmark in reportConfigJson["bookmarks"]: + bID = bookmark["name"] + bName = bookmark["displayName"] + rptPageId = bookmark["explorationState"]["activeSection"] + + for rptPg in bookmark["explorationState"]["sections"]: + for vc in bookmark["explorationState"]["sections"][rptPg][ + "visualContainers" + ]: + vHidden = False + try: + hidden = bookmark["explorationState"]["sections"][rptPg][ + "visualContainers" + ][vc]["singleVisual"]["display"]["mode"] + if hidden == "hidden": + vHidden = True + except: + pass + + new_data = { + "Bookmark ID": bID, + "Bookmark Name": bName, + "Page ID": rptPageId, + "Visual ID": vc, + "Visual Hidden": vHidden, + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + listPages = list_report_pages(report=report, workspace=workspace) + + df = pd.merge(df, listPages[["Page ID", "Page Name"]], on="Page ID", how="left") + df = df[ + [ + "Bookmark ID", + "Bookmark Name", + "Page ID", + "Page Name", + "Visual ID", + "Visual Hidden", + ] + ] + + return df + + except: + print( + f"The '{report}' report within the '{workspace}' workspace has no bookmarks." + ) + + +def translate_report_titles( + report: str, languages: Union[str, List[str]], workspace: Optional[str] = None +): + """ + Dynamically generates new Power BI reports which have report titles translated into the specified language(s). + + Parameters + ---------- + report : str + Name of the Power BI report. + languages : str, List[str] + The language code(s) in which to translate the report titles. + workspace : str, default=None + The Fabric workspace name. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if isinstance(languages, str): + languages = [languages] + + for lang in languages: + language_validate(lang) + + reportJson = get_report_json(report=report, workspace=workspace) + dfV = list_report_visuals(report=report, workspace=workspace) + spark = SparkSession.builder.getOrCreate() + df = spark.createDataFrame(dfV) + columnToTranslate = "Title" + + translate = ( + Translate() + .setTextCol(columnToTranslate) + .setToLanguage(languages) + .setOutputCol("translation") + .setConcurrency(5) + ) + + transDF = ( + translate.transform(df) + .withColumn("translation", flatten(col("translation.translations"))) + .withColumn("translation", col("translation.text")) + .select("Visual ID", columnToTranslate, "translation") + ) + + df_panda = transDF.toPandas() + + i = 0 + for lang in languages: + # Clone report + language = language_validate(lang) + clonedReportName = f"{report}_{language}" + + dfRep = fabric.list_reports(workspace=workspace) + dfRep_filt = dfRep[ + (dfRep["Name"] == clonedReportName) + & (dfRep["Report Type"] == "PowerBIReport") + ] + + if len(dfRep_filt) > 0: + print( + f"{icons.yellow_dot} The '{clonedReportName}' report already exists in the '{workspace} workspace." + ) + else: + clone_report( + report=report, cloned_report=clonedReportName, workspace=workspace + ) + print( + f"{icons.green_dot} The '{clonedReportName}' report has been created via clone in the '{workspace} workspace." + ) + + rptJsonTr = copy.deepcopy(reportJson) + + # Update report json file + for section in rptJsonTr["sections"]: + for visual in section["visualContainers"]: + visualConfig = visual["config"] + visualConfigJson = json.loads(visualConfig) + visualID = visualConfigJson["name"] + + df_filt = df_panda[ + (df_panda["Visual ID"] == visualID) & (df_panda["Title"] != "") + ] + + if len(df_filt) == 1: + tr = df_filt["translation"].str[i].iloc[0] + if len(tr) > 0: + prop = visualConfigJson["singleVisual"]["vcObjects"]["title"][ + 0 + ]["properties"]["text"]["expr"]["Literal"] + prop["Value"] = f"'{tr}'" + + visual["config"] = json.dumps(visualConfigJson) + + i += 1 + + # Post updated report json file to cloned report + update_report_from_reportjson( + report=clonedReportName, report_json=rptJsonTr, workspace=workspace + ) + print( + f"{icons.green_dot} The visual titles within the '{clonedReportName}' report within the '{workspace}' have been translated into '{language}' accordingly." + ) diff --git a/sempy_labs/ReportRebind.py b/src/sempy_labs/report/_report_rebind.py similarity index 66% rename from sempy_labs/ReportRebind.py rename to src/sempy_labs/report/_report_rebind.py index 844cc0b1..6d663ab4 100644 --- a/sempy_labs/ReportRebind.py +++ b/src/sempy_labs/report/_report_rebind.py @@ -1,17 +1,18 @@ import sempy import sempy.fabric as fabric -from .HelperFunctions import resolve_dataset_id, resolve_report_id +from sempy_labs._helper_functions import resolve_dataset_id, resolve_report_id from typing import List, Optional, Union from sempy._utils._log import log +import sempy_labs._icons as icons -green_dot = '\U0001F7E2' -yellow_dot = '\U0001F7E1' -red_dot = '\U0001F534' -in_progress = '⌛' @log -def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = None, dataset_workspace: Optional[str] = None): - +def report_rebind( + report: str, + dataset: str, + report_workspace: Optional[str] = None, + dataset_workspace: Optional[str] = None, +): """ Rebinds a report to a semantic model. @@ -32,7 +33,7 @@ def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = N Returns ------- - + """ if report_workspace == None: @@ -41,28 +42,39 @@ def report_rebind(report: str, dataset: str, report_workspace: Optional[str] = N else: report_workspace_id = fabric.resolve_workspace_id(report_workspace) if dataset_workspace == None: - dataset_workspace = report_workspace + dataset_workspace = report_workspace client = fabric.PowerBIRestClient() - reportId = resolve_report_id(report = report, workspace = report_workspace) - datasetId = resolve_dataset_id(dataset = dataset, workspace = dataset_workspace) + reportId = resolve_report_id(report=report, workspace=report_workspace) + datasetId = resolve_dataset_id(dataset=dataset, workspace=dataset_workspace) # Prepare API - request_body = { - 'datasetId': datasetId - } + request_body = {"datasetId": datasetId} - response = client.post(f"/v1.0/myorg/groups/{report_workspace_id}/reports/{reportId}/Rebind",json=request_body) + response = client.post( + f"/v1.0/myorg/groups/{report_workspace_id}/reports/{reportId}/Rebind", + json=request_body, + ) if response.status_code == 200: - print(f"{green_dot} The '{report}' report has been successfully rebinded to the '{dataset}' semantic model.") + print( + f"{icons.green_dot} The '{report}' report has been successfully rebinded to the '{dataset}' semantic model." + ) else: - print(f"{red_dot} The '{report}' report within the '{report_workspace}' workspace failed to rebind to the '{dataset}' semantic model within the '{dataset_workspace}' workspace.") + print( + f"{icons.red_dot} The '{report}' report within the '{report_workspace}' workspace failed to rebind to the '{dataset}' semantic model within the '{dataset_workspace}' workspace." + ) -@log -def report_rebind_all(dataset: str, new_dataset: str, dataset_workspace: Optional[str] = None, new_dataset_workpace: Optional[str] = None, report_workspace: Optional[str] = None): +@log +def report_rebind_all( + dataset: str, + new_dataset: str, + dataset_workspace: Optional[str] = None, + new_dataset_workpace: Optional[str] = None, + report_workspace: Optional[str] = None, +): """ Rebinds all reports in a workspace which are bound to a specific semantic model to a new semantic model. @@ -86,29 +98,34 @@ def report_rebind_all(dataset: str, new_dataset: str, dataset_workspace: Optiona The name of the Fabric workspace in which the report resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. - + Returns ------- - + """ if dataset_workspace == None: dataset_workspace_id = fabric.get_workspace_id() dataset_workspace = fabric.resolve_workspace_name(dataset_workspace_id) else: - dataset_workspace_id = fabric.resolve_workspace_id(dataset_workspace) + dataset_workspace_id = fabric.resolve_workspace_id(dataset_workspace) if new_dataset_workpace == None: new_dataset_workpace = dataset_workspace if report_workspace == None: report_workspace = dataset_workspace - + datasetId = resolve_dataset_id(dataset, dataset_workspace) - dfRep = fabric.list_reports(workspace = report_workspace) - dfRep_filt = dfRep[dfRep['Dataset Id'] == datasetId] + dfRep = fabric.list_reports(workspace=report_workspace) + dfRep_filt = dfRep[dfRep["Dataset Id"] == datasetId] for i, r in dfRep_filt.iterrows(): - rptName = r['Name'] - report_rebind(report = rptName, dataset = new_dataset, report_workspace = report_workspace, dataset_workspace = new_dataset_workpace) \ No newline at end of file + rptName = r["Name"] + report_rebind( + report=rptName, + dataset=new_dataset, + report_workspace=report_workspace, + dataset_workspace=new_dataset_workpace, + ) diff --git a/tests/test_shortcuts.py b/tests/test_shortcuts.py index b56057f7..22b0f872 100644 --- a/tests/test_shortcuts.py +++ b/tests/test_shortcuts.py @@ -1,6 +1,6 @@ import pandas as pd from json import loads -from sempy_labs.shortcuts import create_shortcut_onelake +from sempy_labs.lakehouse._shortcuts import create_shortcut_onelake from unittest.mock import MagicMock, PropertyMock, patch diff --git a/tests/test_tom.py b/tests/test_tom.py new file mode 100644 index 00000000..bbc130e1 --- /dev/null +++ b/tests/test_tom.py @@ -0,0 +1,31 @@ +import sempy.fabric +from unittest.mock import patch +from sempy_labs import connect_semantic_model + + +@patch("sempy.fabric.resolve_workspace_name") +@patch("sempy.fabric.create_tom_server") +def test_tom_wrapper(create_tom_server, resolve_workspace_name): + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + # create dummy server, database and model + tom_server = TOM.Server() + + db = TOM.Database() + db.Name = "my_dataset" + db.ID = "my_dataset" + db.Model = TOM.Model() + tom_server.Databases.Add(db) + + create_tom_server.return_value = tom_server + + resolve_workspace_name.return_value = "my_workspace" + + # invoke the wrapper + with connect_semantic_model("my_dataset") as tom: + tom.add_table("my_table") + + # validate the result + assert tom_server.Databases["my_dataset"].Model.Tables["my_table"] is not None