From a796339e267ed119ed6495d723b054e2263d7147 Mon Sep 17 00:00:00 2001 From: m-kovalsky Date: Wed, 30 Oct 2024 15:27:22 +0100 Subject: [PATCH] cleaned up notebooks (#240) --- notebooks/Model Optimization.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/Model Optimization.ipynb b/notebooks/Model Optimization.ipynb index 45ab4d00..f5ea91c1 100644 --- a/notebooks/Model Optimization.ipynb +++ b/notebooks/Model Optimization.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install semantic-link-labs"]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs import lakehouse as lake\n","from sempy_labs import directlake\n","import sempy_labs.report as rep\n","\n","dataset_name = ''\n","workspace_name = None"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = dataset_name, workspace = workspace_name, export = 'zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset = dataset_name, workspace = workspace_name, export = 'table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analzyer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path = '', file_name = '')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the Best Practice Analyzer and exports the results to the 'modelbparesults' delta table in your Fabric lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = dataset_name, workspace = workspace_name, export = True)"]},{"cell_type":"markdown","id":"64968a31","metadata":{},"source":["This runs the Best Practice Analyzer with the rules translated into Italian."]},{"cell_type":"code","execution_count":null,"id":"3c7d89e2","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset = dataset_name, workspace = workspace_name, language = 'it-IT')"]},{"cell_type":"markdown","id":"255c30bb","metadata":{},"source":["
\n","Note: For analyzing model BPA results at scale, see the Best Practice Analyzer Report notebook (link below).\n","
\n","\n","[Best Practice Analyzer Notebook](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Best%20Practice%20Analyzer%20Report.ipynb)"]},{"cell_type":"markdown","id":"bab18a61","metadata":{},"source":["### Run BPA using your own best practice rules"]},{"cell_type":"code","execution_count":null,"id":"59b89387","metadata":{},"outputs":[],"source":["import sempy\n","sempy.fabric._client._utils._init_analysis_services()\n","import Microsoft.AnalysisServices.Tabular as TOM\n","import pandas as pd\n","\n","dataset_name = ''\n","workspace_name = ''\n","\n","rules = pd.DataFrame(\n"," [\n"," (\n"," \"Performance\",\n"," \"Table\",\n"," \"Warning\",\n"," \"Rule name...\",\n"," lambda obj, tom: tom.is_calculated_table(table_name=obj.Name),\n"," 'Rule description...',\n"," '',\n"," ),\n"," (\n"," \"Performance\",\n"," \"Column\",\n"," \"Warning\",\n"," \"Do not use floating point data types\",\n"," lambda obj, tom: obj.DataType == TOM.DataType.Double,\n"," 'The \"Double\" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use \"Int64\" or \"Decimal\" where appropriate (but note that \"Decimal\" is limited to 4 digits after the decimal sign).',\n"," )\n"," ],\n"," columns=[\n"," \"Category\",\n"," \"Scope\",\n"," \"Severity\",\n"," \"Rule Name\",\n"," \"Expression\",\n"," \"Description\",\n"," \"URL\",\n"," ],\n",")\n","\n","labs.run_model_bpa(dataset=dataset_name, workspace=workspace_name, rules=rules)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse = None, workspace = None, extended = True, count_rows = False, export = True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["directlake.check_fallback_reason(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["lake.optimize_lakehouse_tables(tables = ['', ''], lakehouse = None, workspace = None)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_isresident(dataset = dataset_name, workspace = workspace_name)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_perspective(dataset = dataset_name, workspace = workspace_name, perspective = '', add_dependencies = True)"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5} +{"cells":[{"cell_type":"markdown","id":"5c27dfd1-4fe0-4a97-92e6-ddf78889aa93","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Install the latest .whl package\n","\n","Check [here](https://pypi.org/project/semantic-link-labs/) to see the latest version."]},{"cell_type":"code","execution_count":null,"id":"d5cae9db-cef9-48a8-a351-9c5fcc99645c","metadata":{"jupyter":{"outputs_hidden":true,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["%pip install semantic-link-labs"]},{"cell_type":"markdown","id":"cd8de5a0","metadata":{},"source":["### Import the library"]},{"cell_type":"code","execution_count":null,"id":"5cc6eedf","metadata":{},"outputs":[],"source":["import sempy_labs as labs\n","from sempy_labs import lakehouse as lake\n","from sempy_labs import directlake\n","import sempy_labs.report as rep\n","\n","dataset_name = ''\n","workspace_name = None"]},{"cell_type":"markdown","id":"5a3fe6e8-b8aa-4447-812b-7931831e07fe","metadata":{"nteract":{"transient":{"deleting":false}}},"source":["### Vertipaq Analyzer"]},{"cell_type":"code","execution_count":null,"id":"cde43b47-4ecc-46ae-9125-9674819c7eab","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.vertipaq_analyzer(dataset=dataset_name, workspace=workspace_name)"]},{"cell_type":"markdown","id":"419a348f","metadata":{},"source":["Export the Vertipaq Analyzer results to a .zip file in your lakehouse"]},{"cell_type":"code","execution_count":null,"id":"8aa239b3","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset=dataset_name, workspace=workspace_name, export='zip')"]},{"cell_type":"markdown","id":"2dce0f4f","metadata":{},"source":["Export the Vertipaq Analyzer results to append to delta tables in your lakehouse."]},{"cell_type":"code","execution_count":null,"id":"aef93fc8","metadata":{},"outputs":[],"source":["labs.vertipaq_analyzer(dataset=dataset_name, workspace=workspace_name, export='table')"]},{"cell_type":"markdown","id":"1c62a802","metadata":{},"source":["Visualize the contents of an exported Vertipaq Analyzer .zip file."]},{"cell_type":"code","execution_count":null,"id":"9e349954","metadata":{},"outputs":[],"source":["labs.import_vertipaq_analyzer(folder_path='', file_name='')"]},{"cell_type":"markdown","id":"456ce0ff","metadata":{},"source":["### Best Practice Analzyer\n","\n","This runs the [standard rules](https://github.com/microsoft/Analysis-Services/tree/master/BestPracticeRules) for semantic models posted on Microsoft's GitHub."]},{"cell_type":"code","execution_count":null,"id":"0a3616b5-566e-414e-a225-fb850d6418dc","metadata":{"jupyter":{"outputs_hidden":false,"source_hidden":false},"nteract":{"transient":{"deleting":false}}},"outputs":[],"source":["labs.run_model_bpa(dataset=dataset_name, workspace=workspace_name)"]},{"cell_type":"markdown","id":"6fb32a58","metadata":{},"source":["This runs the Best Practice Analyzer and exports the results to the 'modelbparesults' delta table in your Fabric lakehouse."]},{"cell_type":"code","execution_count":null,"id":"677851c3","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset=dataset_name, workspace=workspace_name, export=True)"]},{"cell_type":"markdown","id":"64968a31","metadata":{},"source":["This runs the Best Practice Analyzer with the rules translated into Italian."]},{"cell_type":"code","execution_count":null,"id":"3c7d89e2","metadata":{},"outputs":[],"source":["labs.run_model_bpa(dataset=dataset_name, workspace=workspace_name, language ='italian')"]},{"cell_type":"markdown","id":"255c30bb","metadata":{},"source":["
\n","Note: For analyzing model BPA results at scale, see the Best Practice Analyzer Report notebook (link below).\n","
\n","\n","[Best Practice Analyzer Notebook](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Best%20Practice%20Analyzer%20Report.ipynb)"]},{"cell_type":"markdown","id":"bab18a61","metadata":{},"source":["### Run BPA using your own best practice rules"]},{"cell_type":"code","execution_count":null,"id":"59b89387","metadata":{},"outputs":[],"source":["import sempy\n","sempy.fabric._client._utils._init_analysis_services()\n","import Microsoft.AnalysisServices.Tabular as TOM\n","import pandas as pd\n","\n","dataset_name = ''\n","workspace_name = ''\n","\n","rules = pd.DataFrame(\n"," [\n"," (\n"," \"Performance\",\n"," \"Table\",\n"," \"Warning\",\n"," \"Rule name...\",\n"," lambda obj, tom: tom.is_calculated_table(table_name=obj.Name),\n"," 'Rule description...',\n"," '',\n"," ),\n"," (\n"," \"Performance\",\n"," \"Column\",\n"," \"Warning\",\n"," \"Do not use floating point data types\",\n"," lambda obj, tom: obj.DataType == TOM.DataType.Double,\n"," 'The \"Double\" floating point data type should be avoided, as it can result in unpredictable roundoff errors and decreased performance in certain scenarios. Use \"Int64\" or \"Decimal\" where appropriate (but note that \"Decimal\" is limited to 4 digits after the decimal sign).',\n"," )\n"," ],\n"," columns=[\n"," \"Category\",\n"," \"Scope\",\n"," \"Severity\",\n"," \"Rule Name\",\n"," \"Expression\",\n"," \"Description\",\n"," \"URL\",\n"," ],\n",")\n","\n","labs.run_model_bpa(dataset=dataset_name, workspace=workspace_name, rules=rules)"]},{"cell_type":"markdown","id":"8126a1a1","metadata":{},"source":["### Direct Lake\n","\n","Check if any lakehouse tables will hit the [Direct Lake guardrails](https://learn.microsoft.com/power-bi/enterprise/directlake-overview#fallback)."]},{"cell_type":"code","execution_count":null,"id":"e7397b15","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse=None, workspace=None, extended=True, count_rows=False)"]},{"cell_type":"code","execution_count":null,"id":"b30074cf","metadata":{},"outputs":[],"source":["lake.get_lakehouse_tables(lakehouse=None, workspace=None, extended=True, count_rows=False, export=True)"]},{"cell_type":"markdown","id":"99b84f2b","metadata":{},"source":["Check if any tables in a Direct Lake semantic model will fall back to DirectQuery."]},{"cell_type":"code","execution_count":null,"id":"f837be58","metadata":{},"outputs":[],"source":["directlake.check_fallback_reason(dataset=dataset_name, workspace=workspace_name)"]},{"cell_type":"markdown","id":"8f6df93e","metadata":{},"source":["### [OPTIMIZE](https://docs.delta.io/latest/optimizations-oss.html) your lakehouse delta tables."]},{"cell_type":"code","execution_count":null,"id":"e0262c9e","metadata":{},"outputs":[],"source":["lake.optimize_lakehouse_tables(tables=['', ''], lakehouse=None, workspace=workspace_name)"]},{"cell_type":"markdown","id":"0091d6a0","metadata":{},"source":["Refresh/reframe your Direct Lake semantic model and restore the columns which were in memory prior to the refresh."]},{"cell_type":"code","execution_count":null,"id":"77eef082","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_isresident(dataset=dataset_name, workspace=workspace_name)"]},{"cell_type":"markdown","id":"dae1a210","metadata":{},"source":["Ensure a warm cache for your users by putting the columns of a Direct Lake semantic model into memory based on the contents of a [perspective](https://learn.microsoft.com/analysis-services/tabular-models/perspectives-ssas-tabular?view=asallproducts-allversions).\n","\n","Perspectives can be created either in [Tabular Editor 3](https://github.com/TabularEditor/TabularEditor3/releases/latest) or in [Tabular Editor 2](https://github.com/TabularEditor/TabularEditor/releases/latest) using the [Perspective Editor](https://www.elegantbi.com/post/perspectiveeditor)."]},{"cell_type":"code","execution_count":null,"id":"43297001","metadata":{},"outputs":[],"source":["directlake.warm_direct_lake_cache_perspective(\n"," dataset=dataset_name,\n"," workspace=workspace_name,\n"," perspective='',\n"," add_dependencies=True\n",")"]}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"display_name":"Synapse PySpark","language":"Python","name":"synapse_pyspark"},"language_info":{"name":"python"},"microsoft":{"language":"python"},"nteract":{"version":"nteract-front-end@1.0.0"},"spark_compute":{"compute_id":"/trident/default"},"synapse_widget":{"state":{},"version":"0.1"},"widgets":{}},"nbformat":4,"nbformat_minor":5}