From 9b4f2f19e62f04bda94f68007a30bcdfea3f7561 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 11 Nov 2024 09:45:26 +0200 Subject: [PATCH] updates for issue 259 --- src/sempy_labs/_model_bpa.py | 386 ++++++++++++++++-------------- src/sempy_labs/_model_bpa_bulk.py | 13 +- 2 files changed, 217 insertions(+), 182 deletions(-) diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index bdb221e3..5af34d74 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -123,210 +123,236 @@ def map_language(language, language_list): dataset=dataset, workspace=workspace, readonly=True ) as tom: - dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace) - - def translate_using_po(rule_file): - current_dir = os.path.dirname(os.path.abspath(__file__)) - translation_file = ( - f"{current_dir}/_bpa_translation/_model/_translations_{language}.po" + # Do not run BPA for models with no tables + if tom.model.Tables.Count == 0: + finalDF = pd.DataFrame( + columns=[ + "Category", + "Rule Name", + "Severity", + "Object Type", + "Object Name", + "Description", + "URL", + ] ) - for c in ["Category", "Description", "Rule Name"]: - po = polib.pofile(translation_file) - for entry in po: - if entry.tcomment == c.lower().replace(" ", "_"): - rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = ( - entry.msgstr - ) + else: + dep = get_model_calc_dependencies(dataset=dataset, workspace=workspace) - translated = False + def translate_using_po(rule_file): + current_dir = os.path.dirname(os.path.abspath(__file__)) + translation_file = ( + f"{current_dir}/_bpa_translation/_model/_translations_{language}.po" + ) + for c in ["Category", "Description", "Rule Name"]: + po = polib.pofile(translation_file) + for entry in po: + if entry.tcomment == c.lower().replace(" ", "_"): + rule_file.loc[rule_file["Rule Name"] == entry.msgid, c] = ( + entry.msgstr + ) - # Translations - if language is not None and rules is None and language in language_list: - rules = model_bpa_rules(dependencies=dep) - translate_using_po(rules) - translated = True - if rules is None: - rules = model_bpa_rules(dependencies=dep) - if language is not None and not translated: + translated = False - def translate_using_spark(rule_file): + # Translations + if language is not None and rules is None and language in language_list: + rules = model_bpa_rules(dependencies=dep) + translate_using_po(rules) + translated = True + if rules is None: + rules = model_bpa_rules(dependencies=dep) + if language is not None and not translated: - from synapse.ml.services import Translate - from pyspark.sql import SparkSession + def translate_using_spark(rule_file): - rules_temp = rule_file.copy() - rules_temp = rules_temp.drop(["Expression", "URL", "Severity"], axis=1) + from synapse.ml.services import Translate + from pyspark.sql import SparkSession - schema = StructType( - [ - StructField("Category", StringType(), True), - StructField("Scope", StringType(), True), - StructField("Rule Name", StringType(), True), - StructField("Description", StringType(), True), - ] - ) + rules_temp = rule_file.copy() + rules_temp = rules_temp.drop( + ["Expression", "URL", "Severity"], axis=1 + ) - spark = SparkSession.builder.getOrCreate() - dfRules = spark.createDataFrame(rules_temp, schema) - - columns = ["Category", "Rule Name", "Description"] - for clm in columns: - translate = ( - Translate() - .setTextCol(clm) - .setToLanguage(language) - .setOutputCol("translation") - .setConcurrency(5) + schema = StructType( + [ + StructField("Category", StringType(), True), + StructField("Scope", StringType(), True), + StructField("Rule Name", StringType(), True), + StructField("Description", StringType(), True), + ] ) - if clm == "Rule Name": - transDF = ( - translate.transform(dfRules) - .withColumn( - "translation", flatten(col("translation.translations")) - ) - .withColumn("translation", col("translation.text")) - .select(clm, "translation") + spark = SparkSession.builder.getOrCreate() + dfRules = spark.createDataFrame(rules_temp, schema) + + columns = ["Category", "Rule Name", "Description"] + for clm in columns: + translate = ( + Translate() + .setTextCol(clm) + .setToLanguage(language) + .setOutputCol("translation") + .setConcurrency(5) ) - else: - transDF = ( - translate.transform(dfRules) - .withColumn( - "translation", flatten(col("translation.translations")) + + if clm == "Rule Name": + transDF = ( + translate.transform(dfRules) + .withColumn( + "translation", + flatten(col("translation.translations")), + ) + .withColumn("translation", col("translation.text")) + .select(clm, "translation") + ) + else: + transDF = ( + translate.transform(dfRules) + .withColumn( + "translation", + flatten(col("translation.translations")), + ) + .withColumn("translation", col("translation.text")) + .select("Rule Name", clm, "translation") ) - .withColumn("translation", col("translation.text")) - .select("Rule Name", clm, "translation") - ) - df_panda = transDF.toPandas() - rule_file = pd.merge( - rule_file, - df_panda[["Rule Name", "translation"]], - on="Rule Name", - how="left", - ) + df_panda = transDF.toPandas() + rule_file = pd.merge( + rule_file, + df_panda[["Rule Name", "translation"]], + on="Rule Name", + how="left", + ) - rule_file = rule_file.rename( - columns={"translation": f"{clm}Translated"} - ) - rule_file[f"{clm}Translated"] = rule_file[f"{clm}Translated"].apply( - lambda x: x[0] if x is not None else None - ) + rule_file = rule_file.rename( + columns={"translation": f"{clm}Translated"} + ) + rule_file[f"{clm}Translated"] = rule_file[ + f"{clm}Translated" + ].apply(lambda x: x[0] if x is not None else None) - for clm in columns: - rule_file = rule_file.drop([clm], axis=1) - rule_file = rule_file.rename(columns={f"{clm}Translated": clm}) + for clm in columns: + rule_file = rule_file.drop([clm], axis=1) + rule_file = rule_file.rename(columns={f"{clm}Translated": clm}) - return rule_file + return rule_file - rules = translate_using_spark(rules) + rules = translate_using_spark(rules) - rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning - rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error - rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info + rules.loc[rules["Severity"] == "Warning", "Severity"] = icons.warning + rules.loc[rules["Severity"] == "Error", "Severity"] = icons.error + rules.loc[rules["Severity"] == "Info", "Severity"] = icons.info - pd.set_option("display.max_colwidth", 1000) + pd.set_option("display.max_colwidth", 1000) - violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"]) + violations = pd.DataFrame(columns=["Object Name", "Scope", "Rule Name"]) - scope_to_dataframe = { - "Relationship": ( - tom.model.Relationships, - lambda obj: create_relationship_name( - obj.FromTable.Name, - obj.FromColumn.Name, - obj.ToTable.Name, - obj.ToColumn.Name, + scope_to_dataframe = { + "Relationship": ( + tom.model.Relationships, + lambda obj: create_relationship_name( + obj.FromTable.Name, + obj.FromColumn.Name, + obj.ToTable.Name, + obj.ToColumn.Name, + ), ), - ), - "Column": ( - tom.all_columns(), - lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), - ), - "Measure": (tom.all_measures(), lambda obj: obj.Name), - "Hierarchy": ( - tom.all_hierarchies(), - lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), - ), - "Table": (tom.model.Tables, lambda obj: obj.Name), - "Role": (tom.model.Roles, lambda obj: obj.Name), - "Model": (tom.model, lambda obj: obj.Model.Name), - "Calculation Item": ( - tom.all_calculation_items(), - lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name), - ), - "Row Level Security": ( - tom.all_rls(), - lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), - ), - "Partition": ( - tom.all_partitions(), - lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), - ), - } - - for i, r in rules.iterrows(): - ruleName = r["Rule Name"] - expr = r["Expression"] - scopes = r["Scope"] - - if isinstance(scopes, str): - scopes = [scopes] - - for scope in scopes: - func = scope_to_dataframe[scope][0] - nm = scope_to_dataframe[scope][1] - - if scope == "Model": - x = [] - if expr(func, tom): - x = ["Model"] - elif scope == "Measure": - x = [nm(obj) for obj in tom.all_measures() if expr(obj, tom)] - elif scope == "Column": - x = [nm(obj) for obj in tom.all_columns() if expr(obj, tom)] - elif scope == "Partition": - x = [nm(obj) for obj in tom.all_partitions() if expr(obj, tom)] - elif scope == "Hierarchy": - x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj, tom)] - elif scope == "Table": - x = [nm(obj) for obj in tom.model.Tables if expr(obj, tom)] - elif scope == "Relationship": - x = [nm(obj) for obj in tom.model.Relationships if expr(obj, tom)] - elif scope == "Role": - x = [nm(obj) for obj in tom.model.Roles if expr(obj, tom)] - elif scope == "Row Level Security": - x = [nm(obj) for obj in tom.all_rls() if expr(obj, tom)] - elif scope == "Calculation Item": - x = [ - nm(obj) for obj in tom.all_calculation_items() if expr(obj, tom) - ] - - if len(x) > 0: - new_data = {"Object Name": x, "Scope": scope, "Rule Name": ruleName} - violations = pd.concat( - [violations, pd.DataFrame(new_data)], ignore_index=True - ) + "Column": ( + tom.all_columns(), + lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), + ), + "Measure": (tom.all_measures(), lambda obj: obj.Name), + "Hierarchy": ( + tom.all_hierarchies(), + lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), + ), + "Table": (tom.model.Tables, lambda obj: obj.Name), + "Role": (tom.model.Roles, lambda obj: obj.Name), + "Model": (tom.model, lambda obj: obj.Model.Name), + "Calculation Item": ( + tom.all_calculation_items(), + lambda obj: format_dax_object_name(obj.Parent.Table.Name, obj.Name), + ), + "Row Level Security": ( + tom.all_rls(), + lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), + ), + "Partition": ( + tom.all_partitions(), + lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), + ), + } + + for i, r in rules.iterrows(): + ruleName = r["Rule Name"] + expr = r["Expression"] + scopes = r["Scope"] + + if isinstance(scopes, str): + scopes = [scopes] + + for scope in scopes: + func = scope_to_dataframe[scope][0] + nm = scope_to_dataframe[scope][1] + + if scope == "Model": + x = [] + if expr(func, tom): + x = ["Model"] + elif scope == "Measure": + x = [nm(obj) for obj in tom.all_measures() if expr(obj, tom)] + elif scope == "Column": + x = [nm(obj) for obj in tom.all_columns() if expr(obj, tom)] + elif scope == "Partition": + x = [nm(obj) for obj in tom.all_partitions() if expr(obj, tom)] + elif scope == "Hierarchy": + x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj, tom)] + elif scope == "Table": + x = [nm(obj) for obj in tom.model.Tables if expr(obj, tom)] + elif scope == "Relationship": + x = [ + nm(obj) for obj in tom.model.Relationships if expr(obj, tom) + ] + elif scope == "Role": + x = [nm(obj) for obj in tom.model.Roles if expr(obj, tom)] + elif scope == "Row Level Security": + x = [nm(obj) for obj in tom.all_rls() if expr(obj, tom)] + elif scope == "Calculation Item": + x = [ + nm(obj) + for obj in tom.all_calculation_items() + if expr(obj, tom) + ] + + if len(x) > 0: + new_data = { + "Object Name": x, + "Scope": scope, + "Rule Name": ruleName, + } + violations = pd.concat( + [violations, pd.DataFrame(new_data)], ignore_index=True + ) - prepDF = pd.merge( - violations, - rules[["Rule Name", "Category", "Severity", "Description", "URL"]], - left_on="Rule Name", - right_on="Rule Name", - how="left", - ) - prepDF.rename(columns={"Scope": "Object Type"}, inplace=True) - finalDF = prepDF[ - [ - "Category", - "Rule Name", - "Severity", - "Object Type", - "Object Name", - "Description", - "URL", + prepDF = pd.merge( + violations, + rules[["Rule Name", "Category", "Severity", "Description", "URL"]], + left_on="Rule Name", + right_on="Rule Name", + how="left", + ) + prepDF.rename(columns={"Scope": "Object Type"}, inplace=True) + finalDF = prepDF[ + [ + "Category", + "Rule Name", + "Severity", + "Object Type", + "Object Name", + "Description", + "URL", + ] ] - ] if export: if not lakehouse_attached(): diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py index 803ad5af..233902f4 100644 --- a/src/sempy_labs/_model_bpa_bulk.py +++ b/src/sempy_labs/_model_bpa_bulk.py @@ -25,6 +25,7 @@ def run_model_bpa_bulk( language: Optional[str] = None, workspace: Optional[str | List[str]] = None, skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"], + skip_models_in_workspace: Optional[dict] = None, ): """ Runs the semantic model Best Practice Analyzer across all semantic models in a workspace (or all accessible workspaces). @@ -33,8 +34,6 @@ def run_model_bpa_bulk( Parameters ---------- - dataset : str - Name of the semantic model. rules : pandas.DataFrame, default=None A pandas dataframe containing rules to be evaluated. Based on the format of the dataframe produced by the model_bpa_rules function. extended : bool, default=False @@ -47,6 +46,12 @@ def run_model_bpa_bulk( Defaults to None which scans all accessible workspaces. skip_models : str | List[str], default=['ModelBPA', 'Fabric Capacity Metrics'] The semantic models to always skip when running this analysis. + skip_models_in_workspace : dict, default=None + A dictionary showing specific semantic models within specific workspaces to skip. See the example below: + { + "Workspace A": ["Dataset1", "Dataset2"], + "Workspace B": ["Dataset5", "Dataset 8"], + } """ if not lakehouse_attached(): @@ -91,6 +96,10 @@ def run_model_bpa_bulk( df = pd.DataFrame(columns=list(icons.bpa_schema.keys())) dfD = fabric.list_datasets(workspace=wksp, mode="rest") + # Skip models in workspace + skip_models_wkspc = skip_models_in_workspace.get(wksp) + dfD = dfD[~dfD["Dataset Name"].isin(skip_models_wkspc)] + # Exclude default semantic models if len(dfD) > 0: dfI = fabric.list_items(workspace=wksp)