From 9a9b03856c27b88c388469c10d5f6a18912e6887 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Fri, 30 Aug 2024 22:50:13 +0200 Subject: [PATCH 01/17] Saving approach --- dbt-bouncer-example.yml | 292 +++++++++--------- .../checks/run_results/check_run_results.py | 145 ++++----- src/dbt_bouncer/config_file_validator.py | 1 + 3 files changed, 221 insertions(+), 217 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 620bf448..f3c81c04 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -1,159 +1,159 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artifacts exists, generally the `target` directory inside a dbt project. Defaults to `./target`. -# exclude: ^models/staging # Optional, here for demonstration purposes only -# include: ^models/marts # Optional, here for demonstration purposes only -# severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. +# # exclude: ^models/staging # Optional, here for demonstration purposes only +# # include: ^models/marts # Optional, here for demonstration purposes only +# # severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. -catalog_checks: - - name: check_column_description_populated - include: ^models/marts - - name: check_column_name_complies_to_column_type - column_name_pattern: ^is_.* - exclude: ^models/staging - types: - - BOOLEAN - - name: check_column_name_complies_to_column_type - column_name_pattern: .*_date$ - include: ^models/staging # Not a good idea, here for demonstration purposes only - types: - - DATE - - name: check_column_name_complies_to_column_type - column_name_pattern: ^[a-z_]*$ - types: - - BIGINT - - BOOLEAN - - DATE - - DOUBLE - - INTEGER - - VARCHAR - - name: check_column_has_specified_test - column_name_pattern: ^is_.* - test_name: not_null - - name: check_columns_are_all_documented - include: ^models/marts - - name: check_columns_are_documented_in_public_models - - name: check_source_columns_are_all_documented - exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only +# catalog_checks: +# - name: check_column_description_populated +# include: ^models/marts +# - name: check_column_name_complies_to_column_type +# column_name_pattern: ^is_.* +# exclude: ^models/staging +# types: +# - BOOLEAN +# - name: check_column_name_complies_to_column_type +# column_name_pattern: .*_date$ +# include: ^models/staging # Not a good idea, here for demonstration purposes only +# types: +# - DATE +# - name: check_column_name_complies_to_column_type +# column_name_pattern: ^[a-z_]*$ +# types: +# - BIGINT +# - BOOLEAN +# - DATE +# - DOUBLE +# - INTEGER +# - VARCHAR +# - name: check_column_has_specified_test +# column_name_pattern: ^is_.* +# test_name: not_null +# - name: check_columns_are_all_documented +# include: ^models/marts +# - name: check_columns_are_documented_in_public_models +# - name: check_source_columns_are_all_documented +# exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only -manifest_checks: - # Exposures - - name: check_exposure_based_on_non_public_models - - name: check_exposure_based_on_view +# manifest_checks: +# # Exposures +# - name: check_exposure_based_on_non_public_models +# - name: check_exposure_based_on_view - # Lineage - - name: check_lineage_permitted_upstream_models - include: ^models/staging - upstream_path_pattern: $^ - - name: check_lineage_permitted_upstream_models - include: ^models/intermediate - upstream_path_pattern: ^models/staging|^models/intermediate - - name: check_lineage_permitted_upstream_models - include: ^models/marts - upstream_path_pattern: ^models/staging|^models/intermediate - - name: check_lineage_seed_cannot_be_used - include: ^models/intermediate|^models/marts - - name: check_lineage_source_cannot_be_used - include: ^models/intermediate|^models/marts +# # Lineage +# - name: check_lineage_permitted_upstream_models +# include: ^models/staging +# upstream_path_pattern: $^ +# - name: check_lineage_permitted_upstream_models +# include: ^models/intermediate +# upstream_path_pattern: ^models/staging|^models/intermediate +# - name: check_lineage_permitted_upstream_models +# include: ^models/marts +# upstream_path_pattern: ^models/staging|^models/intermediate +# - name: check_lineage_seed_cannot_be_used +# include: ^models/intermediate|^models/marts +# - name: check_lineage_source_cannot_be_used +# include: ^models/intermediate|^models/marts - # Macros - - name: check_macro_arguments_description_populated - - name: check_macro_code_does_not_contain_regexp_pattern - regexp_pattern: .*[i][f][n][u][l][l].* - - name: check_macro_description_populated - - name: check_macro_max_number_of_lines - - name: check_macro_name_matches_file_name - - name: check_macro_property_file_location +# # Macros +# - name: check_macro_arguments_description_populated +# - name: check_macro_code_does_not_contain_regexp_pattern +# regexp_pattern: .*[i][f][n][u][l][l].* +# - name: check_macro_description_populated +# - name: check_macro_max_number_of_lines +# - name: check_macro_name_matches_file_name +# - name: check_macro_property_file_location - # Metadata - - name: check_project_name - project_name_pattern: ^dbt_bouncer_ +# # Metadata +# - name: check_project_name +# project_name_pattern: ^dbt_bouncer_ - # Models - - name: check_model_access - include: ^models/intermediate - access: protected - severity: warn - - name: check_model_access - include: ^models/marts - access: public - - name: check_model_access - include: ^models/staging - access: protected - - name: check_model_code_does_not_contain_regexp_pattern - regexp_pattern: .*[i][f][n][u][l][l].* - - name: check_model_contract_enforced_for_public_model - - name: check_model_depends_on_multiple_sources - - name: check_model_description_populated - - name: check_model_directories - include: ^models - permitted_sub_directories: - - intermediate - - marts - - staging - - utilities - - name: check_model_directories - include: ^models/staging - permitted_sub_directories: - - crm - - payments - - name: check_model_documentation_coverage - - name: check_model_documented_in_same_directory - - name: check_model_has_contracts_enforced - include: ^models/marts - - name: check_model_has_meta_keys - keys: - - maturity - - name: check_model_has_no_upstream_dependencies - include: ^((?!int_*).)*$ - exclude: ^models/utilities/time_spines - - name: check_model_has_tags - include: ^models/staging/crm - tags: - - crm - - name: check_model_has_unique_test - - name: check_model_max_chained_views - - name: check_model_max_fanout - max_downstream_models: 2 - - name: check_model_max_number_of_lines - - name: check_model_max_upstream_dependencies - - name: check_model_has_unit_tests - include: ^models/marts - - name: check_model_names - include: ^models/intermediate - model_name_pattern: ^int_ - - name: check_model_names - include: ^models/staging - model_name_pattern: ^stg_ - - name: check_model_test_coverage - - name: check_model_property_file_location +# # Models +# - name: check_model_access +# include: ^models/intermediate +# access: protected +# severity: warn +# - name: check_model_access +# include: ^models/marts +# access: public +# - name: check_model_access +# include: ^models/staging +# access: protected +# - name: check_model_code_does_not_contain_regexp_pattern +# regexp_pattern: .*[i][f][n][u][l][l].* +# - name: check_model_contract_enforced_for_public_model +# - name: check_model_depends_on_multiple_sources +# - name: check_model_description_populated +# - name: check_model_directories +# include: ^models +# permitted_sub_directories: +# - intermediate +# - marts +# - staging +# - utilities +# - name: check_model_directories +# include: ^models/staging +# permitted_sub_directories: +# - crm +# - payments +# - name: check_model_documentation_coverage +# - name: check_model_documented_in_same_directory +# - name: check_model_has_contracts_enforced +# include: ^models/marts +# - name: check_model_has_meta_keys +# keys: +# - maturity +# - name: check_model_has_no_upstream_dependencies +# include: ^((?!int_*).)*$ +# exclude: ^models/utilities/time_spines +# - name: check_model_has_tags +# include: ^models/staging/crm +# tags: +# - crm +# - name: check_model_has_unique_test +# - name: check_model_max_chained_views +# - name: check_model_max_fanout +# max_downstream_models: 2 +# - name: check_model_max_number_of_lines +# - name: check_model_max_upstream_dependencies +# - name: check_model_has_unit_tests +# include: ^models/marts +# - name: check_model_names +# include: ^models/intermediate +# model_name_pattern: ^int_ +# - name: check_model_names +# include: ^models/staging +# model_name_pattern: ^stg_ +# - name: check_model_test_coverage +# - name: check_model_property_file_location - # Semantic models - - name: check_semantic_model_based_on_non_public_models +# # Semantic models +# - name: check_semantic_model_based_on_non_public_models - # Sources - - name: check_source_description_populated - - name: check_source_freshness_populated - - name: check_source_loader_populated - - name: check_source_has_meta_keys - keys: - - contact: - - email - - slack - - owner - - name: check_source_has_tags - tags: - - example_tag - - name: check_source_names - source_name_pattern: > - ^[a-z0-9_]*$ - - name: check_source_not_orphaned - include: ^models/staging/crm - - name: check_source_property_file_location - - name: check_source_used_by_models_in_same_directory - - name: check_source_used_by_only_one_model +# # Sources +# - name: check_source_description_populated +# - name: check_source_freshness_populated +# - name: check_source_loader_populated +# - name: check_source_has_meta_keys +# keys: +# - contact: +# - email +# - slack +# - owner +# - name: check_source_has_tags +# tags: +# - example_tag +# - name: check_source_names +# source_name_pattern: > +# ^[a-z0-9_]*$ +# - name: check_source_not_orphaned +# include: ^models/staging/crm +# - name: check_source_property_file_location +# - name: check_source_used_by_models_in_same_directory +# - name: check_source_used_by_only_one_model - # Unit tests - - name: check_unit_test_expect_format - - name: check_unit_test_given_formats +# # Unit tests +# - name: check_unit_test_expect_format +# - name: check_unit_test_given_formats run_results_checks: # Not a good idea, here for demonstration purposes only diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index e16a17d4..b552fdbb 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -6,91 +6,94 @@ if TYPE_CHECKING: from dbt_bouncer.parsers import DbtBouncerRunResult + # from dbt_artifacts_parser.parsers.run_results.run_results_v6 import Result -class CheckRunResultsMaxGigabytesBilled(BaseCheck): - max_gigabytes_billed: float - name: Literal["check_run_results_max_gigabytes_billed"] +# class CheckRunResultsMaxGigabytesBilled(BaseCheck): +# max_gigabytes_billed: float +# name: Literal["check_run_results_max_gigabytes_billed"] -def check_run_results_max_gigabytes_billed( - max_gigabytes_billed: float, - run_result: "DbtBouncerRunResult", - **kwargs, -) -> None: - """Each result can have a maximum number of gigabytes billed. +# def check_run_results_max_gigabytes_billed( +# max_gigabytes_billed: float, +# run_result: "DbtBouncerRunResult", +# **kwargs, +# ) -> None: +# """Each result can have a maximum number of gigabytes billed. - !!! note +# !!! note - Note that this check only works for the `dbt-bigquery` adapter. +# Note that this check only works for the `dbt-bigquery` adapter. - Parameters: - max_gigabytes_billed (float): The maximum number of gigabytes billed. - run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. +# Parameters: +# max_gigabytes_billed (float): The maximum number of gigabytes billed. +# run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. - Other Parameters: - exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. - include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. - severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. +# Other Parameters: +# exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. +# include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. +# severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - Raises: # noqa:DOC502 - KeyError: If the `dbt-bigquery` adapter is not used. +# Raises: # noqa:DOC502 +# KeyError: If the `dbt-bigquery` adapter is not used. - Example(s): - ```yaml - run_results_checks: - - name: check_run_results_max_gigabytes_billed - max_gigabytes_billed: 100 - ``` +# Example(s): +# ```yaml +# run_results_checks: +# - name: check_run_results_max_gigabytes_billed +# max_gigabytes_billed: 100 +# ``` - """ - try: - gigabytes_billed = run_result.adapter_response["bytes_billed"] / (1000**3) - except KeyError as e: - raise RuntimeError( # noqa: DOC501 - "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", - ) from e +# """ +# try: +# gigabytes_billed = run_result.adapter_response["bytes_billed"] / (1000**3) +# except KeyError as e: +# raise RuntimeError( # noqa: DOC501 +# "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", +# ) from e - assert ( - gigabytes_billed < max_gigabytes_billed - ), f"`{run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({max_gigabytes_billed})." +# assert ( +# gigabytes_billed < max_gigabytes_billed +# ), f"`{run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({max_gigabytes_billed})." + + +import logging class CheckRunResultsMaxExecutionTime(BaseCheck): max_execution_time_seconds: float name: Literal["check_run_results_max_execution_time"] - - -def check_run_results_max_execution_time( - max_execution_time_seconds: float, - run_result: "DbtBouncerRunResult", - **kwargs, -) -> None: - """Each result can take a maximum duration (seconds). - - Parameters: - max_execution_time_seconds (float): The maximum execution time (seconds) allowed for a node. - run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. - - Other Parameters: - exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. - include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. - severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - - Example(s): - ```yaml - run_results_checks: - - name: check_run_results_max_execution_time - max_execution_time_seconds: 60 - ``` - ```yaml - run_results_checks: - - name: check_run_results_max_execution_time - include: ^models/staging # Not a good idea, here for demonstration purposes only - max_execution_time_seconds: 10 - ``` - - """ - assert ( - run_result.execution_time <= max_execution_time_seconds - ), f"`{run_result.unique_id.split('.')[-1]}` has an execution time ({run_result.execution_time} greater than permitted ({max_execution_time_seconds}s)." + run_result: "DbtBouncerRunResult" + + def execute(self) -> None: + """Each result can take a maximum duration (seconds). + + Parameters: + max_execution_time_seconds (float): The maximum execution time (seconds) allowed for a node. + run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. + + Other Parameters: + exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. + include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. + severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. + + Example(s): + ```yaml + run_results_checks: + - name: check_run_results_max_execution_time + max_execution_time_seconds: 60 + ``` + ```yaml + run_results_checks: + - name: check_run_results_max_execution_time + include: ^models/staging # Not a good idea, here for demonstration purposes only + max_execution_time_seconds: 10 + ``` + + """ + + logging.error("Executing check...") + + assert ( + self.run_result.execution_time <= self.max_execution_time_seconds + ), f"`{self.run_result.unique_id.split('.')[-1]}` has an execution time ({self.run_result.execution_time} greater than permitted ({self.max_execution_time_seconds}s)." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index fb2daa0f..f970ee16 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -175,6 +175,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": logging.info("Validating conf...") # Rebuild the model to ensure all fields are present + from dbt_bouncer.parsers import DbtBouncerRunResult import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 From 57a6b3d2fc1546d6bb0f97c8d75ba2c3625e447c Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Sun, 1 Sep 2024 15:12:01 +0200 Subject: [PATCH 02/17] Saving approach --- dbt-bouncer-example.yml | 4 +- .../docstring/other_parameters.html.jinja | 2 + .../material/docstring/receives.html.jinja | 31 ++++ mkdocs.yml | 5 +- .../checks/manifest/check_models.py | 103 +++++------ .../checks/run_results/check_run_results.py | 170 ++++++++++-------- src/dbt_bouncer/config_file_validator.py | 23 ++- src/dbt_bouncer/runner.py | 67 +++++-- .../checks/run_results/test_run_results.py | 66 ++++--- 9 files changed, 304 insertions(+), 167 deletions(-) create mode 100644 docs/templates/python/material/docstring/receives.html.jinja diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index f3c81c04..962ffad7 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -34,7 +34,7 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # - name: check_source_columns_are_all_documented # exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only -# manifest_checks: +manifest_checks: # # Exposures # - name: check_exposure_based_on_non_public_models # - name: check_exposure_based_on_view @@ -109,7 +109,7 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # include: ^models/staging/crm # tags: # - crm -# - name: check_model_has_unique_test + - name: check_model_has_unique_test # - name: check_model_max_chained_views # - name: check_model_max_fanout # max_downstream_models: 2 diff --git a/docs/templates/python/material/docstring/other_parameters.html.jinja b/docs/templates/python/material/docstring/other_parameters.html.jinja index eebf79bb..7c6fa15a 100644 --- a/docs/templates/python/material/docstring/other_parameters.html.jinja +++ b/docs/templates/python/material/docstring/other_parameters.html.jinja @@ -1,3 +1,5 @@ +{% import "language.html" as lang with context %} +

Other Parameters (passed via config file):

diff --git a/docs/templates/python/material/docstring/receives.html.jinja b/docs/templates/python/material/docstring/receives.html.jinja new file mode 100644 index 00000000..43806581 --- /dev/null +++ b/docs/templates/python/material/docstring/receives.html.jinja @@ -0,0 +1,31 @@ +{% import "language.html" as lang with context %} + +

Receives at execution time:

+
+ + + {% if name_column %}{% endif %} + + + + + + {% for receives in section.value %} + + {% if name_column %}{% endif %} + + + + {% endfor %} + +
{{ lang.t("Name") }}{{ lang.t("Type") }}{{ lang.t("Description") }}
{% if receives.name %}{{ receives.name }}{% endif %} + {% if receives.annotation %} + {% with expression = receives.annotation %} + {% include "expression.html" with context %} + {% endwith %} + {% endif %} + +
+ {{ receives.description|convert_markdown(heading_level, html_id) }} +
+
\ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index f2e62c92..0fb9abdb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -62,13 +62,14 @@ plugins: - src/dbt_bouncer/checks options: docstring_style: google - filters: ["check_"] + # filters: ["check_"] + filters: ["Check"] group_by_category: true heading_level: 1 inherited_members: true members_order: source show_bases: false - show_docstring_classes: false + show_docstring_classes: true show_docstring_raises: false show_if_no_docstring: true show_root_toc_entry: false diff --git a/src/dbt_bouncer/checks/manifest/check_models.py b/src/dbt_bouncer/checks/manifest/check_models.py index dc7a25b0..f35e63ad 100644 --- a/src/dbt_bouncer/checks/manifest/check_models.py +++ b/src/dbt_bouncer/checks/manifest/check_models.py @@ -456,58 +456,61 @@ class CheckModelHasUniqueTest(BaseCheck): "unique", ], ) + model: "DbtBouncerModel" = Field(default=None) name: Literal["check_model_has_unique_test"] + tests: List["DbtBouncerTest"] = Field(default=None) + + + def execute( + # model: "DbtBouncerModel", + # tests: "DbtBouncerTest", + # accepted_uniqueness_tests: List[str] = ( + # [ # noqa: B006 + # "expect_compound_columns_to_be_unique", + # "dbt_utils.unique_combination_of_columns", + # "unique", + # ] + # ), + # **kwargs, + self + ) -> None: + """Models must have a test for uniqueness of a column. + + Parameters: + accepted_uniqueness_tests (Optional[List[str]]): List of tests that are accepted as uniqueness tests. + model (DbtBouncerModel): The DbtBouncerModel object to check. + tests (List[DbtBouncerTest]): List of DbtBouncerTest objects parsed from `manifest.json`. + + Other Parameters: + exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. + include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. + severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. + + Example(s): + ```yaml + manifest_checks: + - name: check_model_has_unique_test + include: ^models/marts + ``` + ```yaml + manifest_checks: + # Example of allowing a custom uniqueness test + - name: check_model_has_unique_test + accepted_uniqueness_tests: + - expect_compound_columns_to_be_unique + - my_custom_uniqueness_test + - unique + ``` - -def check_model_has_unique_test( - model: "DbtBouncerModel", - tests: "DbtBouncerTest", - accepted_uniqueness_tests: List[str] = ( - [ # noqa: B006 - "expect_compound_columns_to_be_unique", - "dbt_utils.unique_combination_of_columns", - "unique", - ] - ), - **kwargs, -) -> None: - """Models must have a test for uniqueness of a column. - - Parameters: - accepted_uniqueness_tests (Optional[List[str]]): List of tests that are accepted as uniqueness tests. - model (DbtBouncerModel): The DbtBouncerModel object to check. - tests (List[DbtBouncerTest]): List of DbtBouncerTest objects parsed from `manifest.json`. - - Other Parameters: - exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. - include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. - severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - - Example(s): - ```yaml - manifest_checks: - - name: check_model_has_unique_test - include: ^models/marts - ``` - ```yaml - manifest_checks: - # Example of allowing a custom uniqueness test - - name: check_model_has_unique_test - accepted_uniqueness_tests: - - expect_compound_columns_to_be_unique - - my_custom_uniqueness_test - - unique - ``` - - """ - num_unique_tests = sum( - test.attached_node == model.unique_id - and test.test_metadata.name in accepted_uniqueness_tests - for test in tests - ) - assert ( - num_unique_tests >= 1 - ), f"`{model.name}` does not have a test for uniqueness of a column." + """ + num_unique_tests = sum( + test.attached_node == self.model.unique_id + and test.test_metadata.name in self.accepted_uniqueness_tests + for test in self.tests + ) + assert ( + num_unique_tests >= 1 + ), f"`{self.model.name}` does not have a test for uniqueness of a column." class CheckModelHasUnitTests(BaseCheck): diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index b552fdbb..07133abd 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -1,6 +1,6 @@ # mypy: disable-error-code="union-attr" -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Literal, Union from dbt_bouncer.check_base import BaseCheck @@ -8,91 +8,109 @@ from dbt_bouncer.parsers import DbtBouncerRunResult # from dbt_artifacts_parser.parsers.run_results.run_results_v6 import Result - -# class CheckRunResultsMaxGigabytesBilled(BaseCheck): -# max_gigabytes_billed: float -# name: Literal["check_run_results_max_gigabytes_billed"] - - -# def check_run_results_max_gigabytes_billed( -# max_gigabytes_billed: float, -# run_result: "DbtBouncerRunResult", -# **kwargs, -# ) -> None: -# """Each result can have a maximum number of gigabytes billed. - -# !!! note - -# Note that this check only works for the `dbt-bigquery` adapter. - -# Parameters: -# max_gigabytes_billed (float): The maximum number of gigabytes billed. -# run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. - -# Other Parameters: -# exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. -# include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. -# severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - -# Raises: # noqa:DOC502 -# KeyError: If the `dbt-bigquery` adapter is not used. - -# Example(s): -# ```yaml -# run_results_checks: -# - name: check_run_results_max_gigabytes_billed -# max_gigabytes_billed: 100 -# ``` - -# """ -# try: -# gigabytes_billed = run_result.adapter_response["bytes_billed"] / (1000**3) -# except KeyError as e: -# raise RuntimeError( # noqa: DOC501 -# "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", -# ) from e - -# assert ( -# gigabytes_billed < max_gigabytes_billed -# ), f"`{run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({max_gigabytes_billed})." +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + from dbt_artifacts_parser.parsers.run_results.run_results_v4 import ( + RunResultOutput as RunResultOutput_v4, + ) + from dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 + from dbt_artifacts_parser.parsers.run_results.run_results_v5 import ( + RunResultOutput as RunResultOutput_v5, + ) + from dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5 + from dbt_artifacts_parser.parsers.run_results.run_results_v6 import ( + Result, + RunResultsV6, + ) + +class CheckRunResultsMaxGigabytesBilled(BaseCheck): + max_gigabytes_billed: float + name: Literal["check_run_results_max_gigabytes_billed"] + + +def check_run_results_max_gigabytes_billed( + max_gigabytes_billed: float, + run_result: "DbtBouncerRunResult", + **kwargs, +) -> None: + """Each result can have a maximum number of gigabytes billed. + + !!! note + + Note that this check only works for the `dbt-bigquery` adapter. + + Parameters: + max_gigabytes_billed (float): The maximum number of gigabytes billed. + run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. + + Other Parameters: + exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. + include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. + severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. + + Raises: # noqa:DOC502 + KeyError: If the `dbt-bigquery` adapter is not used. + + Example(s): + ```yaml + run_results_checks: + - name: check_run_results_max_gigabytes_billed + max_gigabytes_billed: 100 + ``` + + """ + try: + gigabytes_billed = run_result.adapter_response["bytes_billed"] / (1000**3) + except KeyError as e: + raise RuntimeError( # noqa: DOC501 + "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", + ) from e + + assert ( + gigabytes_billed < max_gigabytes_billed + ), f"`{run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({max_gigabytes_billed})." import logging +from typing import Optional +from pydantic import Field class CheckRunResultsMaxExecutionTime(BaseCheck): + """Each result can take a maximum duration (seconds). + + Parameters: + max_execution_time_seconds (float): The maximum execution time (seconds) allowed for a node. + + Receives: + run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. + + Other Parameters: + exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. + include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. + severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. + + Example(s): + ```yaml + run_results_checks: + - name: check_run_results_max_execution_time + max_execution_time_seconds: 60 + ``` + ```yaml + run_results_checks: + - name: check_run_results_max_execution_time + include: ^models/staging # Not a good idea, here for demonstration purposes only + max_execution_time_seconds: 10 + ``` + + """ + max_execution_time_seconds: float name: Literal["check_run_results_max_execution_time"] - run_result: "DbtBouncerRunResult" + run_result: Optional[Union["RunResultOutput_v4", "RunResultOutput_v5", "Result"]] = Field(default=None) def execute(self) -> None: - """Each result can take a maximum duration (seconds). - - Parameters: - max_execution_time_seconds (float): The maximum execution time (seconds) allowed for a node. - run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. - - Other Parameters: - exclude (Optional[str]): Regex pattern to match the resource path. Resource paths that match the pattern will not be checked. - include (Optional[str]): Regex pattern to match the resource path. Only resource paths that match the pattern will be checked. - severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - - Example(s): - ```yaml - run_results_checks: - - name: check_run_results_max_execution_time - max_execution_time_seconds: 60 - ``` - ```yaml - run_results_checks: - - name: check_run_results_max_execution_time - include: ^models/staging # Not a good idea, here for demonstration purposes only - max_execution_time_seconds: 10 - ``` - - """ - - logging.error("Executing check...") assert ( self.run_result.execution_time <= self.max_execution_time_seconds diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index f970ee16..c07034c6 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -18,6 +18,8 @@ class DbtBouncerConf(BaseModel): {"class": getattr(x, x.__name__), "source_file": x.__file__} for x in get_check_objects()["classes"] ] + + # logging.warning(f"{check_classes=}") # Catalog checks catalog_check_classes: ClassVar = [ @@ -25,6 +27,10 @@ class DbtBouncerConf(BaseModel): for x in check_classes if x["source_file"].split("/")[-2] == "catalog" ] + + # logging.warning(f"{catalog_check_classes=}") + + CatalogCheckConfigs: ClassVar = Annotated[ Union[tuple(catalog_check_classes)], Field(discriminator="name"), @@ -47,6 +53,11 @@ class DbtBouncerConf(BaseModel): for x in check_classes if x["source_file"].split("/")[-2] == "run_results" ] + + + # logging.warning(f"{run_results_check_classes=}") + + RunResultsCheckConfigs: ClassVar = Annotated[ Union[tuple(run_results_check_classes)], Field(discriminator="name"), @@ -58,6 +69,10 @@ class DbtBouncerConf(BaseModel): Field(discriminator="name"), ] ] = Field(default=[]) + + # logging.warning(f"{catalog_checks=}") + + manifest_checks: List[ Annotated[ ManifestCheckConfigs, @@ -70,6 +85,9 @@ class DbtBouncerConf(BaseModel): Field(discriminator="name"), ] ] = Field(default=[]) + + + # logging.warning(f"{run_results_checks=}") dbt_artifacts_dir: Optional[str] = Field(default="./target") exclude: Optional[str] = Field( @@ -175,10 +193,13 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": logging.info("Validating conf...") # Rebuild the model to ensure all fields are present - from dbt_bouncer.parsers import DbtBouncerRunResult + from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerTest, DbtBouncerModel import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 DbtBouncerConf.model_rebuild() + + + # logging.warning(f"{config_file_contents=}") return DbtBouncerConf(**config_file_contents) diff --git a/src/dbt_bouncer/runner.py b/src/dbt_bouncer/runner.py index ceadc0f5..11c74a94 100644 --- a/src/dbt_bouncer/runner.py +++ b/src/dbt_bouncer/runner.py @@ -6,9 +6,9 @@ import json import logging import operator -import traceback +import traceback, re from pathlib import Path -from typing import TYPE_CHECKING, Any, List, Union +from typing import TYPE_CHECKING, Any, List, Union, Dict from progress.bar import Bar from tabulate import tabulate @@ -64,8 +64,21 @@ def runner( RuntimeError: If more than one "iterate_over" argument is found. """ - for i in get_check_objects()["functions"]: - locals()[i.__name__] = getattr(i, i.__name__) + # for i in get_check_objects()["functions"]: + # locals()[i.__name__] = getattr(i, i.__name__) + + check_classes: List[Dict[str, Union[Any, str]]] = [ + {"class": getattr(x, x.__name__), "source_file": x.__file__} + for x in get_check_objects()["classes"] + ] + # logging.warning(f"{check_classes=}") + + # logging.warning(check_classes[0]["class"]) + # logging.warning(check_classes[0]["class"].__name__) + for c in check_classes: + locals()[c["class"].__name__] = c["class"] + + # logging.warning(f"{sorted(locals().keys())=}") parsed_data = { "catalog_nodes": catalog_nodes, @@ -98,25 +111,49 @@ def runner( "source", "unit_test", } + + # logging.warning(f"{check=}") + # check_name_camel_case = ''.join(word.title() for word in check.name.split('_')) + # logging.warning(f"{check_name_camel_case=}") + + + + # logging.warning(f"{check.__annotations__.keys()=}") + iterate_over_value = valid_iterate_over_values.intersection( - set(locals()[check.name].__annotations__.keys()), + set(check.__annotations__.keys()), ) + # logging.warning(f"{iterate_over_value=}") if len(iterate_over_value) == 1: iterate_value = next(iter(iterate_over_value)) + # logging.warning(f"{iterate_value=}") + for i in locals()[f"{iterate_value}s"]: if resource_in_path(check, i): - check_run_id = ( - f"{check.name}:{check.index}:{i.unique_id.split('.')[2]}" - ) + check_run_id = f"{check.name}:{check.index}:{i.unique_id.split('.')[2]}" + setattr(check, iterate_value, getattr(i, iterate_value, i)) + + + # logging.warning(f"{parsed_data.keys()=}") + # logging.warning(f"{check.__annotations__.keys()=}") + # logging.warning( + # f"{parsed_data.keys() & check.__annotations__.keys()}" + # ) + + for x in parsed_data.keys() & check.__annotations__.keys(): + # logging.warning(f"{x=}") + setattr(check, x, parsed_data[x]) + checks_to_run.append( { **{ + "check": check, "check_run_id": check_run_id, }, **check.model_dump(), - **{iterate_value: getattr(i, iterate_value, i)}, + # **{iterate_value: getattr(i, iterate_value, i)}, }, ) elif len(iterate_over_value) > 1: @@ -128,6 +165,7 @@ def runner( checks_to_run.append( { **{ + "check": check, "check_run_id": check_run_id, }, **check.model_dump(), @@ -140,7 +178,16 @@ def runner( for check in checks_to_run: logging.debug(f"Running {check['check_run_id']}...") try: - locals()[check["name"]](**{**check, **parsed_data}) + # logging.warning(f"{check=}") + # logging.warning(f"{check['check']=}") + # logging.warning(f"{parsed_data.keys()=}") + # check["check"].run_result = check["run_result"] + check["check"].execute() + # **{ + # **check, + # **parsed_data + # } + # ) check["outcome"] = "success" except AssertionError as e: failure_message = list( diff --git a/tests/unit/checks/run_results/test_run_results.py b/tests/unit/checks/run_results/test_run_results.py index 3a2172df..468dddcf 100644 --- a/tests/unit/checks/run_results/test_run_results.py +++ b/tests/unit/checks/run_results/test_run_results.py @@ -8,10 +8,11 @@ from dbt_artifacts_parser.parsers.run_results.run_results_v6 import Result from dbt_bouncer.checks.run_results.check_run_results import ( - check_run_results_max_execution_time, + CheckRunResultsMaxExecutionTime, check_run_results_max_gigabytes_billed, ) - +from dbt_bouncer.parsers import DbtBouncerRunResult +CheckRunResultsMaxExecutionTime.model_rebuild() @pytest.mark.parametrize( ("max_gigabytes_billed", "run_result", "expectation"), @@ -63,43 +64,55 @@ def test_check_run_results_max_gigabytes_billed( [ ( 10, - Result( + DbtBouncerRunResult( **{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 1, - "status": "success", - "thread_id": "Thread-1", - "timing": [], + "original_file_path": "path/to/file.sql", + "run_result": Result(**{ + "adapter_response": {"bytes_billed": 1}, + "execution_time": 1, + "status": "success", + "thread_id": "Thread-1", + "timing": [], + "unique_id": "model.package_name.model_1", + }), "unique_id": "model.package_name.model_1", - }, + } ), does_not_raise(), ), ( 10, - Result( + DbtBouncerRunResult( **{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 10, - "status": "success", - "thread_id": "Thread-1", - "timing": [], + "original_file_path": "path/to/file.sql", + "run_result": Result(**{ + "adapter_response": {"bytes_billed": 1}, + "execution_time": 10, + "status": "success", + "thread_id": "Thread-1", + "timing": [], + "unique_id": "model.package_name.model_1", + }), "unique_id": "model.package_name.model_1", - }, + } ), does_not_raise(), ), ( 10, - Result( + DbtBouncerRunResult( **{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 100, - "status": "success", - "thread_id": "Thread-1", - "timing": [], + "original_file_path": "path/to/file.sql", + "run_result": Result(**{ + "adapter_response": {"bytes_billed": 1}, + "execution_time": 100, + "status": "success", + "thread_id": "Thread-1", + "timing": [], + "unique_id": "model.package_name.model_1", + }), "unique_id": "model.package_name.model_1", - }, + } ), pytest.raises(AssertionError), ), @@ -111,7 +124,8 @@ def test_check_run_results_max_execution_time( expectation, ): with expectation: - check_run_results_max_execution_time( + CheckRunResultsMaxExecutionTime( max_execution_time_seconds=max_execution_time_seconds, - run_result=run_result, - ) + name="check_run_results_max_execution_time", + run_result=run_result.run_result, + ).execute() From 767a9f328d684a590bbc2ee7b6a78a880cd9a53b Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 11:17:38 +0200 Subject: [PATCH 03/17] Moving run_results to new format --- .../checks/run_results/check_run_results.py | 48 ++++++------ src/dbt_bouncer/config_file_validator.py | 14 +--- src/dbt_bouncer/parsers.py | 5 +- .../checks/run_results/test_run_results.py | 77 +++++++++---------- 4 files changed, 63 insertions(+), 81 deletions(-) diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index 07133abd..dbbf606e 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -4,9 +4,12 @@ from dbt_bouncer.check_base import BaseCheck + +import logging +from typing import Optional +from pydantic import Field if TYPE_CHECKING: - from dbt_bouncer.parsers import DbtBouncerRunResult - # from dbt_artifacts_parser.parsers.run_results.run_results_v6 import Result + from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerRunResultBase import warnings with warnings.catch_warnings(): @@ -25,15 +28,6 @@ ) class CheckRunResultsMaxGigabytesBilled(BaseCheck): - max_gigabytes_billed: float - name: Literal["check_run_results_max_gigabytes_billed"] - - -def check_run_results_max_gigabytes_billed( - max_gigabytes_billed: float, - run_result: "DbtBouncerRunResult", - **kwargs, -) -> None: """Each result can have a maximum number of gigabytes billed. !!! note @@ -56,25 +50,29 @@ def check_run_results_max_gigabytes_billed( ```yaml run_results_checks: - name: check_run_results_max_gigabytes_billed - max_gigabytes_billed: 100 + max_gigabytes_billed: 100 ``` """ - try: - gigabytes_billed = run_result.adapter_response["bytes_billed"] / (1000**3) - except KeyError as e: - raise RuntimeError( # noqa: DOC501 - "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", - ) from e + + max_gigabytes_billed: float + name: Literal["check_run_results_max_gigabytes_billed"] + run_result: Optional["DbtBouncerRunResultBase"] = Field(default=None) - assert ( - gigabytes_billed < max_gigabytes_billed - ), f"`{run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({max_gigabytes_billed})." + def execute(self + ) -> None: + try: + gigabytes_billed = self.run_result.adapter_response["bytes_billed"] / (1000**3) + except KeyError as e: + raise RuntimeError( # noqa: DOC501 + "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", + ) from e + + assert ( + gigabytes_billed < self.max_gigabytes_billed + ), f"`{self.run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({self.max_gigabytes_billed})." -import logging -from typing import Optional -from pydantic import Field class CheckRunResultsMaxExecutionTime(BaseCheck): @@ -108,7 +106,7 @@ class CheckRunResultsMaxExecutionTime(BaseCheck): max_execution_time_seconds: float name: Literal["check_run_results_max_execution_time"] - run_result: Optional[Union["RunResultOutput_v4", "RunResultOutput_v5", "Result"]] = Field(default=None) + run_result: Optional["DbtBouncerRunResultBase"] = Field(default=None) def execute(self) -> None: diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index 33c144ca..4bf76192 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -7,7 +7,7 @@ from typing_extensions import Annotated from dbt_bouncer.utils import get_check_objects, load_config_from_yaml - +from dbt_bouncer.parsers import DbtBouncerRunResultBase class DbtBouncerConf(BaseModel): """Base model for the config file contents.""" @@ -18,8 +18,6 @@ class DbtBouncerConf(BaseModel): {"class": getattr(x, x.__name__), "source_file": x.__file__} for x in get_check_objects()["classes"] ] - - # logging.warning(f"{check_classes=}") # Catalog checks catalog_check_classes: ClassVar = [ @@ -28,8 +26,6 @@ class DbtBouncerConf(BaseModel): if x["source_file"].split("/")[-2] == "catalog" ] - # logging.warning(f"{catalog_check_classes=}") - CatalogCheckConfigs: ClassVar = Annotated[ Union[tuple(catalog_check_classes)], @@ -55,9 +51,6 @@ class DbtBouncerConf(BaseModel): ] - # logging.warning(f"{run_results_check_classes=}") - - RunResultsCheckConfigs: ClassVar = Annotated[ Union[tuple(run_results_check_classes)], Field(discriminator="name"), @@ -70,8 +63,6 @@ class DbtBouncerConf(BaseModel): ] ] = Field(default=[]) - # logging.warning(f"{catalog_checks=}") - manifest_checks: List[ Annotated[ @@ -87,7 +78,6 @@ class DbtBouncerConf(BaseModel): ] = Field(default=[]) - # logging.warning(f"{run_results_checks=}") dbt_artifacts_dir: Optional[str] = Field(default="./target") exclude: Optional[str] = Field( @@ -199,7 +189,5 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerConf.model_rebuild() - - # logging.warning(f"{config_file_contents=}") return DbtBouncerConf(**config_file_contents) diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index 9155ff96..5001ee74 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -89,11 +89,14 @@ class DbtBouncerModel(BaseModel): unique_id: str +DbtBouncerRunResultBase = Union[RunResultOutput_v4, RunResultOutput_v5, Result] + + class DbtBouncerRunResult(BaseModel): """Model for all results in `run_results.json`.""" original_file_path: str - run_result: Union[RunResultOutput_v4, RunResultOutput_v5, Result] + run_result: DbtBouncerRunResultBase unique_id: str diff --git a/tests/unit/checks/run_results/test_run_results.py b/tests/unit/checks/run_results/test_run_results.py index 468dddcf..9eceba28 100644 --- a/tests/unit/checks/run_results/test_run_results.py +++ b/tests/unit/checks/run_results/test_run_results.py @@ -9,18 +9,22 @@ from dbt_bouncer.checks.run_results.check_run_results import ( CheckRunResultsMaxExecutionTime, - check_run_results_max_gigabytes_billed, + CheckRunResultsMaxGigabytesBilled, ) -from dbt_bouncer.parsers import DbtBouncerRunResult +from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerRunResultBase +from pydantic import TypeAdapter + +CheckRunResultsMaxGigabytesBilled.model_rebuild() CheckRunResultsMaxExecutionTime.model_rebuild() + @pytest.mark.parametrize( ("max_gigabytes_billed", "run_result", "expectation"), [ ( 10, - Result( - **{ + TypeAdapter(DbtBouncerRunResultBase).validate_python( + { "adapter_response": {"bytes_billed": 1}, "execution_time": 1, "status": "success", @@ -33,8 +37,8 @@ ), ( 10, - Result( - **{ + TypeAdapter(DbtBouncerRunResultBase).validate_python( + { "adapter_response": {"bytes_billed": 100000000000}, "execution_time": 1, "status": "success", @@ -53,10 +57,11 @@ def test_check_run_results_max_gigabytes_billed( expectation, ): with expectation: - check_run_results_max_gigabytes_billed( + CheckRunResultsMaxGigabytesBilled( max_gigabytes_billed=max_gigabytes_billed, + name="check_run_results_max_gigabytes_billed", run_result=run_result, - ) + ).execute() @pytest.mark.parametrize( @@ -64,17 +69,13 @@ def test_check_run_results_max_gigabytes_billed( [ ( 10, - DbtBouncerRunResult( - **{ - "original_file_path": "path/to/file.sql", - "run_result": Result(**{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 1, - "status": "success", - "thread_id": "Thread-1", - "timing": [], - "unique_id": "model.package_name.model_1", - }), + TypeAdapter(DbtBouncerRunResultBase).validate_python( + { + "adapter_response": {"bytes_billed": 1}, + "execution_time": 1, + "status": "success", + "thread_id": "Thread-1", + "timing": [], "unique_id": "model.package_name.model_1", } ), @@ -82,17 +83,13 @@ def test_check_run_results_max_gigabytes_billed( ), ( 10, - DbtBouncerRunResult( - **{ - "original_file_path": "path/to/file.sql", - "run_result": Result(**{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 10, - "status": "success", - "thread_id": "Thread-1", - "timing": [], - "unique_id": "model.package_name.model_1", - }), + TypeAdapter(DbtBouncerRunResultBase).validate_python( + { + "adapter_response": {"bytes_billed": 1}, + "execution_time": 10, + "status": "success", + "thread_id": "Thread-1", + "timing": [], "unique_id": "model.package_name.model_1", } ), @@ -100,17 +97,13 @@ def test_check_run_results_max_gigabytes_billed( ), ( 10, - DbtBouncerRunResult( - **{ - "original_file_path": "path/to/file.sql", - "run_result": Result(**{ - "adapter_response": {"bytes_billed": 1}, - "execution_time": 100, - "status": "success", - "thread_id": "Thread-1", - "timing": [], - "unique_id": "model.package_name.model_1", - }), + TypeAdapter(DbtBouncerRunResultBase).validate_python( + { + "adapter_response": {"bytes_billed": 1}, + "execution_time": 100, + "status": "success", + "thread_id": "Thread-1", + "timing": [], "unique_id": "model.package_name.model_1", } ), @@ -127,5 +120,5 @@ def test_check_run_results_max_execution_time( CheckRunResultsMaxExecutionTime( max_execution_time_seconds=max_execution_time_seconds, name="check_run_results_max_execution_time", - run_result=run_result.run_result, + run_result=run_result, ).execute() From 0e3a063b0c7e0f76b1b035400c30c13643f6e14f Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 11:23:26 +0200 Subject: [PATCH 04/17] Running pre-commit --- .../material/docstring/receives.html.jinja | 2 +- .../checks/manifest/check_models.py | 9 ++-- .../checks/run_results/check_run_results.py | 42 ++++++---------- src/dbt_bouncer/config_file_validator.py | 21 ++++---- src/dbt_bouncer/runner.py | 50 +++---------------- .../checks/run_results/test_run_results.py | 6 +-- 6 files changed, 39 insertions(+), 91 deletions(-) diff --git a/docs/templates/python/material/docstring/receives.html.jinja b/docs/templates/python/material/docstring/receives.html.jinja index 43806581..a3bb29ca 100644 --- a/docs/templates/python/material/docstring/receives.html.jinja +++ b/docs/templates/python/material/docstring/receives.html.jinja @@ -28,4 +28,4 @@ {% endfor %} - \ No newline at end of file + diff --git a/src/dbt_bouncer/checks/manifest/check_models.py b/src/dbt_bouncer/checks/manifest/check_models.py index f35e63ad..17381553 100644 --- a/src/dbt_bouncer/checks/manifest/check_models.py +++ b/src/dbt_bouncer/checks/manifest/check_models.py @@ -458,21 +458,20 @@ class CheckModelHasUniqueTest(BaseCheck): ) model: "DbtBouncerModel" = Field(default=None) name: Literal["check_model_has_unique_test"] - tests: List["DbtBouncerTest"] = Field(default=None) - + tests: List["DbtBouncerTest"] = Field(default=[]) def execute( # model: "DbtBouncerModel", # tests: "DbtBouncerTest", # accepted_uniqueness_tests: List[str] = ( - # [ # noqa: B006 + # [ # "expect_compound_columns_to_be_unique", # "dbt_utils.unique_combination_of_columns", # "unique", # ] # ), # **kwargs, - self + self, ) -> None: """Models must have a test for uniqueness of a column. @@ -505,7 +504,7 @@ def execute( """ num_unique_tests = sum( test.attached_node == self.model.unique_id - and test.test_metadata.name in self.accepted_uniqueness_tests + and test.test_metadata.name in self.accepted_uniqueness_tests # type: ignore[operator] for test in self.tests ) assert ( diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index dbbf606e..e66881e9 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -1,31 +1,19 @@ # mypy: disable-error-code="union-attr" -from typing import TYPE_CHECKING, Literal, Union +from typing import TYPE_CHECKING, Literal, Optional -from dbt_bouncer.check_base import BaseCheck +from pydantic import Field +from dbt_bouncer.check_base import BaseCheck -import logging -from typing import Optional -from pydantic import Field if TYPE_CHECKING: - from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerRunResultBase + from dbt_bouncer.parsers import DbtBouncerRunResultBase import warnings + with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - from dbt_artifacts_parser.parsers.run_results.run_results_v4 import ( - RunResultOutput as RunResultOutput_v4, - ) - from dbt_artifacts_parser.parsers.run_results.run_results_v4 import RunResultsV4 - from dbt_artifacts_parser.parsers.run_results.run_results_v5 import ( - RunResultOutput as RunResultOutput_v5, - ) - from dbt_artifacts_parser.parsers.run_results.run_results_v5 import RunResultsV5 - from dbt_artifacts_parser.parsers.run_results.run_results_v6 import ( - Result, - RunResultsV6, - ) + class CheckRunResultsMaxGigabytesBilled(BaseCheck): """Each result can have a maximum number of gigabytes billed. @@ -54,16 +42,17 @@ class CheckRunResultsMaxGigabytesBilled(BaseCheck): ``` """ - + max_gigabytes_billed: float name: Literal["check_run_results_max_gigabytes_billed"] run_result: Optional["DbtBouncerRunResultBase"] = Field(default=None) - - def execute(self - ) -> None: + def execute(self) -> None: + """Execute the check.""" try: - gigabytes_billed = self.run_result.adapter_response["bytes_billed"] / (1000**3) + gigabytes_billed = self.run_result.adapter_response["bytes_billed"] / ( + 1000**3 + ) except KeyError as e: raise RuntimeError( # noqa: DOC501 "`bytes_billed` not found in adapter response. Are you using the `dbt-bigquery` adapter?", @@ -74,13 +63,12 @@ def execute(self ), f"`{self.run_result.unique_id.split('.')[-2]}` results in ({gigabytes_billed} billed bytes, this is greater than permitted ({self.max_gigabytes_billed})." - class CheckRunResultsMaxExecutionTime(BaseCheck): """Each result can take a maximum duration (seconds). Parameters: max_execution_time_seconds (float): The maximum execution time (seconds) allowed for a node. - + Receives: run_result (DbtBouncerRunResult): The DbtBouncerRunResult object to check. @@ -103,13 +91,13 @@ class CheckRunResultsMaxExecutionTime(BaseCheck): ``` """ - + max_execution_time_seconds: float name: Literal["check_run_results_max_execution_time"] run_result: Optional["DbtBouncerRunResultBase"] = Field(default=None) def execute(self) -> None: - + """Execute the check.""" assert ( self.run_result.execution_time <= self.max_execution_time_seconds ), f"`{self.run_result.unique_id.split('.')[-1]}` has an execution time ({self.run_result.execution_time} greater than permitted ({self.max_execution_time_seconds}s)." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index 4bf76192..f8907a13 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -7,7 +7,7 @@ from typing_extensions import Annotated from dbt_bouncer.utils import get_check_objects, load_config_from_yaml -from dbt_bouncer.parsers import DbtBouncerRunResultBase + class DbtBouncerConf(BaseModel): """Base model for the config file contents.""" @@ -25,8 +25,7 @@ class DbtBouncerConf(BaseModel): for x in check_classes if x["source_file"].split("/")[-2] == "catalog" ] - - + CatalogCheckConfigs: ClassVar = Annotated[ Union[tuple(catalog_check_classes)], Field(discriminator="name"), @@ -49,8 +48,7 @@ class DbtBouncerConf(BaseModel): for x in check_classes if x["source_file"].split("/")[-2] == "run_results" ] - - + RunResultsCheckConfigs: ClassVar = Annotated[ Union[tuple(run_results_check_classes)], Field(discriminator="name"), @@ -62,8 +60,7 @@ class DbtBouncerConf(BaseModel): Field(discriminator="name"), ] ] = Field(default=[]) - - + manifest_checks: List[ Annotated[ ManifestCheckConfigs, @@ -76,8 +73,6 @@ class DbtBouncerConf(BaseModel): Field(discriminator="name"), ] ] = Field(default=[]) - - dbt_artifacts_dir: Optional[str] = Field(default="./target") exclude: Optional[str] = Field( @@ -183,11 +178,15 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": logging.info("Validating conf...") # Rebuild the model to ensure all fields are present - from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerTest, DbtBouncerModel import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 + from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerModel, + DbtBouncerRunResult, + DbtBouncerRunResultBase, + DbtBouncerTest, + ) DbtBouncerConf.model_rebuild() - return DbtBouncerConf(**config_file_contents) diff --git a/src/dbt_bouncer/runner.py b/src/dbt_bouncer/runner.py index 11c74a94..52f904d7 100644 --- a/src/dbt_bouncer/runner.py +++ b/src/dbt_bouncer/runner.py @@ -6,9 +6,9 @@ import json import logging import operator -import traceback, re +import traceback from pathlib import Path -from typing import TYPE_CHECKING, Any, List, Union, Dict +from typing import TYPE_CHECKING, Any, Dict, List, Union from progress.bar import Bar from tabulate import tabulate @@ -64,21 +64,12 @@ def runner( RuntimeError: If more than one "iterate_over" argument is found. """ - # for i in get_check_objects()["functions"]: - # locals()[i.__name__] = getattr(i, i.__name__) - check_classes: List[Dict[str, Union[Any, str]]] = [ {"class": getattr(x, x.__name__), "source_file": x.__file__} for x in get_check_objects()["classes"] ] - # logging.warning(f"{check_classes=}") - - # logging.warning(check_classes[0]["class"]) - # logging.warning(check_classes[0]["class"].__name__) for c in check_classes: - locals()[c["class"].__name__] = c["class"] - - # logging.warning(f"{sorted(locals().keys())=}") + locals()[c["class"].__name__] = c["class"] # type: ignore[union-attr] parsed_data = { "catalog_nodes": catalog_nodes, @@ -111,39 +102,20 @@ def runner( "source", "unit_test", } - - # logging.warning(f"{check=}") - # check_name_camel_case = ''.join(word.title() for word in check.name.split('_')) - # logging.warning(f"{check_name_camel_case=}") - - - - # logging.warning(f"{check.__annotations__.keys()=}") - iterate_over_value = valid_iterate_over_values.intersection( set(check.__annotations__.keys()), ) - # logging.warning(f"{iterate_over_value=}") if len(iterate_over_value) == 1: iterate_value = next(iter(iterate_over_value)) - - # logging.warning(f"{iterate_value=}") - for i in locals()[f"{iterate_value}s"]: if resource_in_path(check, i): - check_run_id = f"{check.name}:{check.index}:{i.unique_id.split('.')[2]}" + check_run_id = ( + f"{check.name}:{check.index}:{i.unique_id.split('.')[2]}" + ) setattr(check, iterate_value, getattr(i, iterate_value, i)) - - # logging.warning(f"{parsed_data.keys()=}") - # logging.warning(f"{check.__annotations__.keys()=}") - # logging.warning( - # f"{parsed_data.keys() & check.__annotations__.keys()}" - # ) - for x in parsed_data.keys() & check.__annotations__.keys(): - # logging.warning(f"{x=}") setattr(check, x, parsed_data[x]) checks_to_run.append( @@ -153,7 +125,6 @@ def runner( "check_run_id": check_run_id, }, **check.model_dump(), - # **{iterate_value: getattr(i, iterate_value, i)}, }, ) elif len(iterate_over_value) > 1: @@ -178,16 +149,7 @@ def runner( for check in checks_to_run: logging.debug(f"Running {check['check_run_id']}...") try: - # logging.warning(f"{check=}") - # logging.warning(f"{check['check']=}") - # logging.warning(f"{parsed_data.keys()=}") - # check["check"].run_result = check["run_result"] check["check"].execute() - # **{ - # **check, - # **parsed_data - # } - # ) check["outcome"] = "success" except AssertionError as e: failure_message = list( diff --git a/tests/unit/checks/run_results/test_run_results.py b/tests/unit/checks/run_results/test_run_results.py index 9eceba28..ea33e577 100644 --- a/tests/unit/checks/run_results/test_run_results.py +++ b/tests/unit/checks/run_results/test_run_results.py @@ -5,14 +5,14 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - from dbt_artifacts_parser.parsers.run_results.run_results_v6 import Result + +from pydantic import TypeAdapter from dbt_bouncer.checks.run_results.check_run_results import ( CheckRunResultsMaxExecutionTime, CheckRunResultsMaxGigabytesBilled, ) -from dbt_bouncer.parsers import DbtBouncerRunResult, DbtBouncerRunResultBase -from pydantic import TypeAdapter +from dbt_bouncer.parsers import DbtBouncerRunResultBase CheckRunResultsMaxGigabytesBilled.model_rebuild() CheckRunResultsMaxExecutionTime.model_rebuild() From e7f88aef3e52272c0010309a9216922e90c5f01c Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 13:26:25 +0200 Subject: [PATCH 05/17] Moving Exposures to new method --- dbt-bouncer-example.yml | 6 +- .../checks/manifest/check_exposures.py | 90 +++++++++---------- .../checks/run_results/check_run_results.py | 2 +- src/dbt_bouncer/config_file_validator.py | 6 ++ src/dbt_bouncer/parsers.py | 5 +- tests/unit/checks/manifest/test_exposures.py | 18 ++-- 6 files changed, 71 insertions(+), 56 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 962ffad7..a9cfddcc 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -35,9 +35,9 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only manifest_checks: -# # Exposures -# - name: check_exposure_based_on_non_public_models -# - name: check_exposure_based_on_view + # Exposures + - name: check_exposure_based_on_non_public_models + - name: check_exposure_based_on_view # # Lineage # - name: check_lineage_permitted_upstream_models diff --git a/src/dbt_bouncer/checks/manifest/check_exposures.py b/src/dbt_bouncer/checks/manifest/check_exposures.py index 13b99094..de2c4efe 100644 --- a/src/dbt_bouncer/checks/manifest/check_exposures.py +++ b/src/dbt_bouncer/checks/manifest/check_exposures.py @@ -10,26 +10,18 @@ if TYPE_CHECKING: import warnings - from dbt_bouncer.parsers import DbtBouncerModel - with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Exposures + from dbt_bouncer.parsers import DbtBouncerModelBase class CheckExposureOnNonPublicModels(BaseCheck): - name: Literal["check_exposure_based_on_non_public_models"] - - -def check_exposure_based_on_non_public_models( - exposure: "Exposures", - models: List["DbtBouncerModel"], - **kwargs, -) -> None: """Exposures should be based on public models only. - Parameters: + Receives: exposure (Exposures): The Exposures object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the exposure path (i.e the .yml file where the exposure is configured). Exposure paths that match the pattern will not be checked. @@ -43,38 +35,36 @@ def check_exposure_based_on_non_public_models( ``` """ - non_public_upstream_dependencies = [] - for model in exposure.depends_on.nodes: - if ( - model.split(".")[0] == "model" - and model.split(".")[1] == exposure.package_name - ): - model = next(m for m in models if m.unique_id == model) - if model.access.value != "public": - non_public_upstream_dependencies.append(model.name) - assert not non_public_upstream_dependencies, f"`{exposure.name}` is based on a model(s) that is not public: {non_public_upstream_dependencies}." + exposure: "Exposures" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_exposure_based_on_non_public_models"] + def execute(self) -> None: + """Execute the check.""" + non_public_upstream_dependencies = [] + for model in self.exposure.depends_on.nodes: + if ( + model.split(".")[0] == "model" + and model.split(".")[1] == self.exposure.package_name + ): + model = next(m for m in self.models if m.unique_id == model) + if model.access.value != "public": + non_public_upstream_dependencies.append(model.name) -class CheckExposureOnView(BaseCheck): - materializations_to_include: List[str] = Field( - default=["ephemeral", "view"], - ) - name: Literal["check_exposure_based_on_view"] + assert not non_public_upstream_dependencies, f"`{self.exposure.name}` is based on a model(s) that is not public: {non_public_upstream_dependencies}." -def check_exposure_based_on_view( - exposure: "Exposures", - models: List["DbtBouncerModel"], - materializations_to_include: List[str] = ["ephemeral", "view"], # noqa: B006 - **kwargs, -) -> None: +class CheckExposureOnView(BaseCheck): """Exposures should not be based on views. Parameters: - exposure (Exposures): The Exposures object to check. materializations_to_include (Optional[List[str]]): List of materializations to include in the check. + Receives: + exposure (Exposures): The Exposures object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + Other Parameters: exclude (Optional[str]): Regex pattern to match the exposure path (i.e the .yml file where the exposure is configured). Exposure paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the exposure path (i.e the .yml file where the exposure is configured). Only exposure paths that match the pattern will be checked. @@ -88,21 +78,31 @@ def check_exposure_based_on_view( ```yaml manifest_checks: - name: check_exposure_based_on_view - materializations_to_include: + materializations_to_include: - ephemeral - my_custom_materialization - view ``` """ - non_table_upstream_dependencies = [] - for model in exposure.depends_on.nodes: - if ( - model.split(".")[0] == "model" - and model.split(".")[1] == exposure.package_name - ): - model = next(m for m in models if m.unique_id == model) - if model.config.materialized in materializations_to_include: - non_table_upstream_dependencies.append(model.name) - - assert not non_table_upstream_dependencies, f"`{exposure.name}` is based on a model that is not a table: {non_table_upstream_dependencies}." + + exposure: "Exposures" = Field(default=None) + materializations_to_include: List[str] = Field( + default=["ephemeral", "view"], + ) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_exposure_based_on_view"] + + def execute(self) -> None: + """Execute the check.""" + non_table_upstream_dependencies = [] + for model in self.exposure.depends_on.nodes: + if ( + model.split(".")[0] == "model" + and model.split(".")[1] == self.exposure.package_name + ): + model = next(m for m in self.models if m.unique_id == model) + if model.config.materialized in self.materializations_to_include: + non_table_upstream_dependencies.append(model.name) + + assert not non_table_upstream_dependencies, f"`{self.exposure.name}` is based on a model that is not a table: {non_table_upstream_dependencies}." diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index e66881e9..5c81eeb7 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -38,7 +38,7 @@ class CheckRunResultsMaxGigabytesBilled(BaseCheck): ```yaml run_results_checks: - name: check_run_results_max_gigabytes_billed - max_gigabytes_billed: 100 + max_gigabytes_billed: 100 ``` """ diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index f8907a13..de8158c7 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -37,6 +37,7 @@ class DbtBouncerConf(BaseModel): for x in check_classes if x["source_file"].split("/")[-2] == "manifest" ] + ManifestCheckConfigs: ClassVar = Annotated[ Union[tuple(manifest_check_classes)], Field(discriminator="name"), @@ -178,10 +179,15 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": logging.info("Validating conf...") # Rebuild the model to ensure all fields are present + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( + Exposures, # noqa: F401 + ) + import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.parsers import ( # noqa: F401 DbtBouncerModel, + DbtBouncerModelBase, DbtBouncerRunResult, DbtBouncerRunResultBase, DbtBouncerTest, diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index 5001ee74..64c0f232 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -81,10 +81,13 @@ class DbtBouncerManifest(BaseModel): manifest: Union[ManifestV10, ManifestV11, ManifestV12] +DbtBouncerModelBase = Union[ModelNode_v10, ModelNode_v11, Nodes4] + + class DbtBouncerModel(BaseModel): """Model for all model nodes in `manifest.json`.""" - model: Union[ModelNode_v10, ModelNode_v11, Nodes4] + model: DbtBouncerModelBase original_file_path: str unique_id: str diff --git a/tests/unit/checks/manifest/test_exposures.py b/tests/unit/checks/manifest/test_exposures.py index c5c928b9..3e817461 100644 --- a/tests/unit/checks/manifest/test_exposures.py +++ b/tests/unit/checks/manifest/test_exposures.py @@ -8,9 +8,13 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Exposures, Nodes4 from dbt_bouncer.checks.manifest.check_exposures import ( - check_exposure_based_on_non_public_models, - check_exposure_based_on_view, + CheckExposureOnNonPublicModels, + CheckExposureOnView, ) +from dbt_bouncer.parsers import DbtBouncerModel, DbtBouncerModelBase # noqa: F401 + +CheckExposureOnNonPublicModels.model_rebuild() +CheckExposureOnView.model_rebuild() @pytest.mark.parametrize( @@ -118,10 +122,11 @@ ) def test_check_exposure_based_on_non_public_models(exposure, models, expectation): with expectation: - check_exposure_based_on_non_public_models( + CheckExposureOnNonPublicModels( exposure=exposure, models=models, - ) + name="check_exposure_based_on_non_public_models", + ).execute() @pytest.mark.parametrize( @@ -322,8 +327,9 @@ def test_check_exposure_based_on_view( expectation, ): with expectation: - check_exposure_based_on_view( + CheckExposureOnView( exposure=exposure, materializations_to_include=materializations_to_include, models=models, - ) + name="check_exposure_based_on_view", + ).execute() From 7a99557e5c0b1152d31322c10e60c22cb2c24b1b Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 13:50:01 +0200 Subject: [PATCH 06/17] Moving Lineage to new method --- dbt-bouncer-example.yml | 28 ++--- .../checks/manifest/check_lineage.py | 103 ++++++++++-------- src/dbt_bouncer/config_file_validator.py | 1 + src/dbt_bouncer/runner.py | 1 - tests/unit/checks/manifest/test_lineage.py | 26 +++-- 5 files changed, 89 insertions(+), 70 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index a9cfddcc..3f0bbe9b 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -39,20 +39,20 @@ manifest_checks: - name: check_exposure_based_on_non_public_models - name: check_exposure_based_on_view -# # Lineage -# - name: check_lineage_permitted_upstream_models -# include: ^models/staging -# upstream_path_pattern: $^ -# - name: check_lineage_permitted_upstream_models -# include: ^models/intermediate -# upstream_path_pattern: ^models/staging|^models/intermediate -# - name: check_lineage_permitted_upstream_models -# include: ^models/marts -# upstream_path_pattern: ^models/staging|^models/intermediate -# - name: check_lineage_seed_cannot_be_used -# include: ^models/intermediate|^models/marts -# - name: check_lineage_source_cannot_be_used -# include: ^models/intermediate|^models/marts + # Lineage + - name: check_lineage_permitted_upstream_models + include: ^models/staging + upstream_path_pattern: $^ + - name: check_lineage_permitted_upstream_models + include: ^models/intermediate + upstream_path_pattern: ^models/staging|^models/intermediate + - name: check_lineage_permitted_upstream_models + include: ^models/marts + upstream_path_pattern: ^models/staging|^models/intermediate + - name: check_lineage_seed_cannot_be_used + include: ^models/intermediate|^models/marts + - name: check_lineage_source_cannot_be_used + include: ^models/intermediate|^models/marts # # Macros # - name: check_macro_arguments_description_populated diff --git a/src/dbt_bouncer/checks/manifest/check_lineage.py b/src/dbt_bouncer/checks/manifest/check_lineage.py index 805f6ae6..a7e5e9d8 100644 --- a/src/dbt_bouncer/checks/manifest/check_lineage.py +++ b/src/dbt_bouncer/checks/manifest/check_lineage.py @@ -6,29 +6,25 @@ from dbt_bouncer.check_base import BaseCheck if TYPE_CHECKING: - from dbt_bouncer.parsers import DbtBouncerManifest, DbtBouncerModel + from dbt_bouncer.parsers import ( + DbtBouncerManifest, + DbtBouncerModelBase, + ) +from pydantic import Field -class CheckLineagePermittedUpstreamModels(BaseCheck): - name: Literal["check_lineage_permitted_upstream_models"] - upstream_path_pattern: str - -def check_lineage_permitted_upstream_models( - manifest_obj: "DbtBouncerManifest", - model: "DbtBouncerModel", - models: List["DbtBouncerModel"], - upstream_path_pattern: str, - **kwargs, -) -> None: +class CheckLineagePermittedUpstreamModels(BaseCheck): """Upstream models must have a path that matches the provided `upstream_path_pattern`. Parameters: - manifest_obj (DbtBouncerManifest): The manifest object. - model (DbtBouncerModel): The DbtBouncerModel object to check. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. upstream_path_pattern (str): Regexp pattern to match the upstream model(s) path. + Receives: + manifest_obj (DbtBouncerManifest): The manifest object. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. @@ -49,32 +45,39 @@ def check_lineage_permitted_upstream_models( ``` """ - upstream_models = [ - x - for x in model.depends_on.nodes - if x.split(".")[0] == "model" - and x.split(".")[1] == manifest_obj.manifest.metadata.project_name - ] - not_permitted_upstream_models = [ - upstream_model - for upstream_model in upstream_models - if re.compile(upstream_path_pattern.strip()).match( - next(m for m in models if m.unique_id == upstream_model).original_file_path, - ) - is None - ] - assert not not_permitted_upstream_models, f"`{model.name}` references upstream models that are not permitted: {[m.split('.')[-1] for m in not_permitted_upstream_models]}." + manifest_obj: "DbtBouncerManifest" = Field(default=None) + model: "DbtBouncerModelBase" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_lineage_permitted_upstream_models"] + upstream_path_pattern: str -class CheckLineageSeedCannotBeUsed(BaseCheck): - name: Literal["check_lineage_seed_cannot_be_used"] + def execute(self) -> None: + """Execute the check.""" + upstream_models = [ + x + for x in self.model.depends_on.nodes + if x.split(".")[0] == "model" + and x.split(".")[1] == self.manifest_obj.manifest.metadata.project_name + ] + not_permitted_upstream_models = [ + upstream_model + for upstream_model in upstream_models + if re.compile(self.upstream_path_pattern.strip()).match( + next( + m for m in self.models if m.unique_id == upstream_model + ).original_file_path, + ) + is None + ] + assert not not_permitted_upstream_models, f"`{self.model.name}` references upstream models that are not permitted: {[m.split('.')[-1] for m in not_permitted_upstream_models]}." -def check_lineage_seed_cannot_be_used(model: "DbtBouncerModel", **kwargs) -> None: +class CheckLineageSeedCannotBeUsed(BaseCheck): """Seed cannot be referenced in models with a path that matches the specified `include` config. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -89,20 +92,22 @@ def check_lineage_seed_cannot_be_used(model: "DbtBouncerModel", **kwargs) -> Non ``` """ - assert not [ - x for x in model.depends_on.nodes if x.split(".")[0] == "seed" - ], f"`{model.name}` references a seed even though this is not permitted." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_lineage_seed_cannot_be_used"] -class CheckLineageSourceCannotBeUsed(BaseCheck): - name: Literal["check_lineage_source_cannot_be_used"] + def execute(self) -> None: + """Execute the check.""" + assert not [ + x for x in self.model.depends_on.nodes if x.split(".")[0] == "seed" + ], f"`{self.model.name}` references a seed even though this is not permitted." -def check_lineage_source_cannot_be_used(model: "DbtBouncerModel", **kwargs) -> None: +class CheckLineageSourceCannotBeUsed(BaseCheck): """Sources cannot be referenced in models with a path that matches the specified `include` config. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -117,6 +122,12 @@ def check_lineage_source_cannot_be_used(model: "DbtBouncerModel", **kwargs) -> N ``` """ - assert not [ - x for x in model.depends_on.nodes if x.split(".")[0] == "source" - ], f"`{model.name}` references a source even though this is not permitted." + + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_lineage_source_cannot_be_used"] + + def execute(self) -> None: + """Execute the check.""" + assert not [ + x for x in self.model.depends_on.nodes if x.split(".")[0] == "source" + ], f"`{self.model.name}` references a source even though this is not permitted." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index de8158c7..0c057c7c 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -186,6 +186,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerManifest, DbtBouncerModel, DbtBouncerModelBase, DbtBouncerRunResult, diff --git a/src/dbt_bouncer/runner.py b/src/dbt_bouncer/runner.py index 52f904d7..a81c4072 100644 --- a/src/dbt_bouncer/runner.py +++ b/src/dbt_bouncer/runner.py @@ -105,7 +105,6 @@ def runner( iterate_over_value = valid_iterate_over_values.intersection( set(check.__annotations__.keys()), ) - if len(iterate_over_value) == 1: iterate_value = next(iter(iterate_over_value)) for i in locals()[f"{iterate_value}s"]: diff --git a/tests/unit/checks/manifest/test_lineage.py b/tests/unit/checks/manifest/test_lineage.py index 66ccb7c7..92062efe 100644 --- a/tests/unit/checks/manifest/test_lineage.py +++ b/tests/unit/checks/manifest/test_lineage.py @@ -8,10 +8,15 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4 from dbt_bouncer.checks.manifest.check_lineage import ( - check_lineage_permitted_upstream_models, - check_lineage_seed_cannot_be_used, - check_lineage_source_cannot_be_used, + CheckLineagePermittedUpstreamModels, + CheckLineageSeedCannotBeUsed, + CheckLineageSourceCannotBeUsed, ) +from dbt_bouncer.parsers import DbtBouncerManifest, DbtBouncerModelBase # noqa: F401 + +CheckLineagePermittedUpstreamModels.model_rebuild() +CheckLineageSeedCannotBeUsed.model_rebuild() +CheckLineageSourceCannotBeUsed.model_rebuild() @pytest.mark.parametrize( @@ -160,12 +165,13 @@ def test_check_lineage_permitted_upstream_models( expectation, ): with expectation: - check_lineage_permitted_upstream_models( + CheckLineagePermittedUpstreamModels( manifest_obj=manifest_obj, model=model, models=models, + name="check_lineage_permitted_upstream_models", upstream_path_pattern=upstream_path_pattern, - ) + ).execute() @pytest.mark.parametrize( @@ -225,9 +231,10 @@ def test_check_lineage_permitted_upstream_models( ) def test_check_lineage_seed_cannot_be_used(model, expectation): with expectation: - check_lineage_seed_cannot_be_used( + CheckLineageSeedCannotBeUsed( model=model, - ) + name="check_lineage_seed_cannot_be_used", + ).execute() @pytest.mark.parametrize( @@ -287,6 +294,7 @@ def test_check_lineage_seed_cannot_be_used(model, expectation): ) def test_check_lineage_source_cannot_be_used(model, expectation): with expectation: - check_lineage_source_cannot_be_used( + CheckLineageSourceCannotBeUsed( model=model, - ) + name="check_lineage_source_cannot_be_used", + ).execute() From 56cba6f0f27501195ef358564cb5a6d64c88fa4f Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 14:03:05 +0200 Subject: [PATCH 07/17] Moving Macros to new method --- dbt-bouncer-example.yml | 16 +- .../checks/manifest/check_macros.py | 248 +++++++++--------- src/dbt_bouncer/config_file_validator.py | 1 + tests/unit/checks/manifest/test_macros.py | 50 ++-- 4 files changed, 170 insertions(+), 145 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 3f0bbe9b..40e67920 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -54,14 +54,14 @@ manifest_checks: - name: check_lineage_source_cannot_be_used include: ^models/intermediate|^models/marts -# # Macros -# - name: check_macro_arguments_description_populated -# - name: check_macro_code_does_not_contain_regexp_pattern -# regexp_pattern: .*[i][f][n][u][l][l].* -# - name: check_macro_description_populated -# - name: check_macro_max_number_of_lines -# - name: check_macro_name_matches_file_name -# - name: check_macro_property_file_location + # Macros + - name: check_macro_arguments_description_populated + - name: check_macro_code_does_not_contain_regexp_pattern + regexp_pattern: .*[i][f][n][u][l][l].* + - name: check_macro_description_populated + - name: check_macro_max_number_of_lines + - name: check_macro_name_matches_file_name + - name: check_macro_property_file_location # # Metadata # - name: check_project_name diff --git a/src/dbt_bouncer/checks/manifest/check_macros.py b/src/dbt_bouncer/checks/manifest/check_macros.py index 94f34b15..ae20e809 100644 --- a/src/dbt_bouncer/checks/manifest/check_macros.py +++ b/src/dbt_bouncer/checks/manifest/check_macros.py @@ -24,13 +24,9 @@ class TagExtension(StandaloneTag): class CheckMacroArgumentsDescriptionPopulated(BaseCheck): - name: Literal["check_macro_arguments_description_populated"] - - -def check_macro_arguments_description_populated(macro: "Macros", **kwargs) -> None: """Macro arguments must have a populated description. - Parameters: + Receives: macro (Macros): The Macros object to check. Other Parameters: @@ -51,66 +47,65 @@ def check_macro_arguments_description_populated(macro: "Macros", **kwargs) -> No ``` """ - environment = jinja2.Environment(autoescape=True, extensions=[TagExtension]) - ast = environment.parse(macro.macro_sql) - - if hasattr(ast.body[0], "args"): - # Assume macro is a "true" macro - macro_arguments = [a.name for a in ast.body[0].args] - else: - if "materialization" in [ - x.value.value - for x in ast.body[0].nodes[0].kwargs # type: ignore[attr-defined] - if isinstance(x.value, jinja2.nodes.Const) - ]: - # Materializations don't have arguments - macro_arguments = [] - else: - # Macro is a test - test_macro = next( - x - for x in ast.body - if not isinstance(x.nodes[0], jinja2.nodes.Call) # type: ignore[attr-defined] - ) - macro_arguments = [ - x.name - for x in test_macro.nodes # type: ignore[attr-defined] - if isinstance(x, jinja2.nodes.Name) - ] - - # macro_arguments: List of args parsed from macro SQL - # macro.arguments: List of args manually added to the properties file - non_complying_args = [] - for arg in macro_arguments: - macro_doc_raw = [x for x in macro.arguments if x.name == arg] - if macro_doc_raw == [] or ( - arg not in [x.name for x in macro.arguments] - or len(macro_doc_raw[0].description.strip()) <= 4 - ): - non_complying_args.append(arg) + macro: "Macros" = Field(default=None) + name: Literal["check_macro_arguments_description_populated"] - assert ( - non_complying_args == [] - ), f"Macro `{macro.name}` does not have a populated description for the following argument(s): {non_complying_args}." + def execute(self) -> None: + """Execute the check.""" + environment = jinja2.Environment(autoescape=True, extensions=[TagExtension]) + ast = environment.parse(self.macro.macro_sql) + if hasattr(ast.body[0], "args"): + # Assume macro is a "true" macro + macro_arguments = [a.name for a in ast.body[0].args] + else: + if "materialization" in [ + x.value.value + for x in ast.body[0].nodes[0].kwargs # type: ignore[attr-defined] + if isinstance(x.value, jinja2.nodes.Const) + ]: + # Materializations don't have arguments + macro_arguments = [] + else: + # Macro is a test + test_macro = next( + x + for x in ast.body + if not isinstance(x.nodes[0], jinja2.nodes.Call) # type: ignore[attr-defined] + ) + macro_arguments = [ + x.name + for x in test_macro.nodes # type: ignore[attr-defined] + if isinstance(x, jinja2.nodes.Name) + ] + + # macro_arguments: List of args parsed from macro SQL + # macro.arguments: List of args manually added to the properties file + + non_complying_args = [] + for arg in macro_arguments: + macro_doc_raw = [x for x in self.macro.arguments if x.name == arg] + if macro_doc_raw == [] or ( + arg not in [x.name for x in self.macro.arguments] + or len(macro_doc_raw[0].description.strip()) <= 4 + ): + non_complying_args.append(arg) -class CheckMacroCodeDoesNotContainRegexpPattern(BaseCheck): - name: Literal["check_macro_code_does_not_contain_regexp_pattern"] - regexp_pattern: str + assert ( + non_complying_args == [] + ), f"Macro `{self.macro.name}` does not have a populated description for the following argument(s): {non_complying_args}." -def check_macro_code_does_not_contain_regexp_pattern( - macro: "Macros", - regexp_pattern: str, - **kwargs, -) -> None: +class CheckMacroCodeDoesNotContainRegexpPattern(BaseCheck): """The raw code for a macro must not match the specified regexp pattern. Parameters: - macro (Macros): The Macros object to check. regexp_pattern (str): The regexp pattern that should not be matched by the macro code. + Receives: + macro (Macros): The Macros object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the macro path. Macro paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the macro path. Only macro paths that match the pattern will be checked. @@ -125,20 +120,25 @@ def check_macro_code_does_not_contain_regexp_pattern( ``` """ - assert ( - re.compile(regexp_pattern.strip(), flags=re.DOTALL).match(macro.macro_sql) - is None - ), f"Macro `{macro.name}` contains a banned string: `{regexp_pattern.strip()}`." + macro: "Macros" = Field(default=None) + name: Literal["check_macro_code_does_not_contain_regexp_pattern"] + regexp_pattern: str -class CheckMacroDescriptionPopulated(BaseCheck): - name: Literal["check_macro_description_populated"] + def execute(self) -> None: + """Execute the check.""" + assert ( + re.compile(self.regexp_pattern.strip(), flags=re.DOTALL).match( + self.macro.macro_sql + ) + is None + ), f"Macro `{self.macro.name}` contains a banned string: `{self.regexp_pattern.strip()}`." -def check_macro_description_populated(macro: "Macros", **kwargs) -> None: +class CheckMacroDescriptionPopulated(BaseCheck): """Macros must have a populated description. - Parameters: + Receives: macro (Macros): The Macros object to check. Other Parameters: @@ -159,27 +159,26 @@ def check_macro_description_populated(macro: "Macros", **kwargs) -> None: ``` """ - assert ( - len(macro.description.strip()) > 4 - ), f"Macro `{macro.name}` does not have a populated description." + macro: "Macros" = Field(default=None) + name: Literal["check_macro_description_populated"] -class CheckMacroMaxNumberOfLines(BaseCheck): - name: Literal["check_macro_max_number_of_lines"] - max_number_of_lines: int = Field(default=50) + def execute(self) -> None: + """Execute the check.""" + assert ( + len(self.macro.description.strip()) > 4 + ), f"Macro `{self.macro.name}` does not have a populated description." -def check_macro_max_number_of_lines( - macro: "Macros", - max_number_of_lines: int = 50, - **kwargs, -) -> None: +class CheckMacroMaxNumberOfLines(BaseCheck): """Macros may not have more than the specified number of lines. Parameters: - macro (Macros): The Macros object to check. max_number_of_lines (int): The maximum number of permitted lines. + Receives: + macro (Macros): The Macros object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the macro path. Macro paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the macro path. Only macro paths that match the pattern will be checked. @@ -197,23 +196,26 @@ def check_macro_max_number_of_lines( ``` """ - actual_number_of_lines = macro.macro_sql.count("\n") + 1 - assert ( - actual_number_of_lines <= max_number_of_lines - ), f"Macro `{macro.name}` has {actual_number_of_lines} lines, this is more than the maximum permitted number of lines ({max_number_of_lines})." + macro: "Macros" = Field(default=None) + name: Literal["check_macro_max_number_of_lines"] + max_number_of_lines: int = Field(default=50) + def execute(self) -> None: + """Execute the check.""" + actual_number_of_lines = self.macro.macro_sql.count("\n") + 1 -class CheckMacroNameMatchesFileName(BaseCheck): - name: Literal["check_macro_name_matches_file_name"] + assert ( + actual_number_of_lines <= self.max_number_of_lines + ), f"Macro `{self.macro.name}` has {actual_number_of_lines} lines, this is more than the maximum permitted number of lines ({self.max_number_of_lines})." -def check_macro_name_matches_file_name(macro: "Macros", **kwargs) -> None: +class CheckMacroNameMatchesFileName(BaseCheck): """Macros names must be the same as the file they are contained in. Generic tests are also macros, however to document these tests the "name" value must be preceded with "test_". - Parameters: + Receives: macro (Macros): The Macros object to check. Other Parameters: @@ -228,24 +230,28 @@ def check_macro_name_matches_file_name(macro: "Macros", **kwargs) -> None: ``` """ - if macro.name.startswith("test_"): - assert ( - macro.name[5:] == macro.original_file_path.split("/")[-1].split(".")[0] - ), f"Macro `{macro.unique_id}` is not in a file named `{macro.name[5:]}.sql`." - else: - assert ( - macro.name == macro.original_file_path.split("/")[-1].split(".")[0] - ), f"Macro `{macro.name}` is not in a file of the same name." + macro: "Macros" = Field(default=None) + name: Literal["check_macro_name_matches_file_name"] -class CheckMacroPropertyFileLocation(BaseCheck): - name: Literal["check_macro_property_file_location"] + def execute(self) -> None: + """Execute the check.""" + if self.macro.name.startswith("test_"): + assert ( + self.macro.name[5:] + == self.macro.original_file_path.split("/")[-1].split(".")[0] + ), f"Macro `{self.macro.unique_id}` is not in a file named `{self.macro.name[5:]}.sql`." + else: + assert ( + self.macro.name + == self.macro.original_file_path.split("/")[-1].split(".")[0] + ), f"Macro `{self.macro.name}` is not in a file of the same name." -def check_macro_property_file_location(macro: "Macros", **kwargs) -> None: +class CheckMacroPropertyFileLocation(BaseCheck): """Macro properties files must follow the guidance provided by dbt [here](https://docs.getdbt.com/best-practices/how-we-structure/5-the-rest-of-the-project#how-we-use-the-other-folders). - Parameters: + Receives: macro (Macros): The Macros object to check. Other Parameters: @@ -260,28 +266,34 @@ def check_macro_property_file_location(macro: "Macros", **kwargs) -> None: ``` """ - expected_substr = "_".join(macro.original_file_path[6:].split("/")[:-1]) - - assert ( - macro.patch_path is not None - ), f"Macro `{macro.name}` is not defined in a `.yml` properties file." - properties_yml_name = macro.patch_path.split("/")[-1] - - if macro.original_file_path.startswith( - "tests/", - ): # Do not check generic tests (which are also macros) - pass - elif expected_substr == "": # i.e. macro in ./macros - assert ( - properties_yml_name == "_macros.yml" - ), f"The properties file for `{macro.name}` (`{properties_yml_name}`) should be `_macros.yml`." - else: - assert properties_yml_name.startswith( - "_", - ), f"The properties file for `{macro.name}` (`{properties_yml_name}`) does not start with an underscore." + + macro: "Macros" = Field(default=None) + name: Literal["check_macro_property_file_location"] + + def execute(self) -> None: + """Execute the check.""" + expected_substr = "_".join(self.macro.original_file_path[6:].split("/")[:-1]) + assert ( - expected_substr in properties_yml_name - ), f"The properties file for `{macro.name}` (`{properties_yml_name}`) does not contain the expected substring (`{expected_substr}`)." - assert properties_yml_name.endswith( - "__macros.yml", - ), f"The properties file for `{macro.name.name}` (`{properties_yml_name}`) does not end with `__macros.yml`." + self.macro.patch_path is not None + ), f"Macro `{self.macro.name}` is not defined in a `.yml` properties file." + properties_yml_name = self.macro.patch_path.split("/")[-1] + + if self.macro.original_file_path.startswith( + "tests/", + ): # Do not check generic tests (which are also macros) + pass + elif expected_substr == "": # i.e. macro in ./macros + assert ( + properties_yml_name == "_macros.yml" + ), f"The properties file for `{self.macro.name}` (`{properties_yml_name}`) should be `_macros.yml`." + else: + assert properties_yml_name.startswith( + "_", + ), f"The properties file for `{self.macro.name}` (`{properties_yml_name}`) does not start with an underscore." + assert ( + expected_substr in properties_yml_name + ), f"The properties file for `{self.macro.name}` (`{properties_yml_name}`) does not contain the expected substring (`{expected_substr}`)." + assert properties_yml_name.endswith( + "__macros.yml", + ), f"The properties file for `{self.macro.name.name}` (`{properties_yml_name}`) does not end with `__macros.yml`." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index 0c057c7c..c4157a74 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -181,6 +181,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": # Rebuild the model to ensure all fields are present from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( Exposures, # noqa: F401 + Macros, # noqa: F401 ) import dbt_bouncer.checks # noqa: F401 diff --git a/tests/unit/checks/manifest/test_macros.py b/tests/unit/checks/manifest/test_macros.py index ae36c84e..ddba286f 100644 --- a/tests/unit/checks/manifest/test_macros.py +++ b/tests/unit/checks/manifest/test_macros.py @@ -8,14 +8,21 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Macros from dbt_bouncer.checks.manifest.check_macros import ( - check_macro_arguments_description_populated, - check_macro_code_does_not_contain_regexp_pattern, - check_macro_description_populated, - check_macro_max_number_of_lines, - check_macro_name_matches_file_name, - check_macro_property_file_location, + CheckMacroArgumentsDescriptionPopulated, + CheckMacroCodeDoesNotContainRegexpPattern, + CheckMacroDescriptionPopulated, + CheckMacroMaxNumberOfLines, + CheckMacroNameMatchesFileName, + CheckMacroPropertyFileLocation, ) +CheckMacroArgumentsDescriptionPopulated.model_rebuild() +CheckMacroCodeDoesNotContainRegexpPattern.model_rebuild() +CheckMacroDescriptionPopulated.model_rebuild() +CheckMacroMaxNumberOfLines.model_rebuild() +CheckMacroNameMatchesFileName.model_rebuild() +CheckMacroPropertyFileLocation.model_rebuild() + @pytest.mark.parametrize( ("macro", "expectation"), @@ -156,9 +163,9 @@ ) def test_check_macro_arguments_description_populated(macro, expectation): with expectation: - check_macro_arguments_description_populated( - macro=macro, - ) + CheckMacroArgumentsDescriptionPopulated( + macro=macro, name="check_macro_arguments_description_populated" + ).execute() @pytest.mark.parametrize( @@ -222,10 +229,11 @@ def test_check_macro_code_does_not_contain_regexp_pattern( expectation, ): with expectation: - check_macro_code_does_not_contain_regexp_pattern( + CheckMacroCodeDoesNotContainRegexpPattern( macro=macro, + name="check_macro_code_does_not_contain_regexp_pattern", regexp_pattern=regexp_pattern, - ) + ).execute() @pytest.mark.parametrize( @@ -296,9 +304,10 @@ def test_check_macro_code_does_not_contain_regexp_pattern( ) def test_check_macro_description_populated(macro, expectation): with expectation: - check_macro_description_populated( + CheckMacroDescriptionPopulated( macro=macro, - ) + name="check_macro_description_populated", + ).execute() @pytest.mark.parametrize( @@ -338,10 +347,11 @@ def test_check_macro_description_populated(macro, expectation): ) def test_check_macro_max_number_of_lines(max_number_of_lines, macro, expectation): with expectation: - check_macro_max_number_of_lines( + CheckMacroMaxNumberOfLines( max_number_of_lines=max_number_of_lines, macro=macro, - ) + name="check_macro_max_number_of_lines", + ).execute() @pytest.mark.parametrize( @@ -407,9 +417,10 @@ def test_check_macro_max_number_of_lines(max_number_of_lines, macro, expectation ) def test_check_macro_name_matches_file_name(macro, expectation): with expectation: - check_macro_name_matches_file_name( + CheckMacroNameMatchesFileName( macro=macro, - ) + name="check_macro_name_matches_file_name", + ).execute() @pytest.mark.parametrize( @@ -509,6 +520,7 @@ def test_check_macro_name_matches_file_name(macro, expectation): ) def test_check_macro_property_file_location(macro, expectation): with expectation: - check_macro_property_file_location( + CheckMacroPropertyFileLocation( macro=macro, - ) + name="check_macro_property_file_location", + ).execute() From 87d84837ee7f06245b839c0476cdca6022b851eb Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 14:11:28 +0200 Subject: [PATCH 08/17] Moving Unit Tests to new method --- dbt-bouncer-example.yml | 6 +- .../checks/manifest/check_unit_tests.py | 86 ++++++++++--------- src/dbt_bouncer/config_file_validator.py | 1 + tests/unit/checks/manifest/test_unit_tests.py | 18 ++-- 4 files changed, 62 insertions(+), 49 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 40e67920..fc196084 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -151,9 +151,9 @@ manifest_checks: # - name: check_source_used_by_models_in_same_directory # - name: check_source_used_by_only_one_model -# # Unit tests -# - name: check_unit_test_expect_format -# - name: check_unit_test_given_formats + # Unit tests + - name: check_unit_test_expect_format + - name: check_unit_test_given_formats run_results_checks: # Not a good idea, here for demonstration purposes only diff --git a/src/dbt_bouncer/checks/manifest/check_unit_tests.py b/src/dbt_bouncer/checks/manifest/check_unit_tests.py index b336fef3..08dfbb25 100644 --- a/src/dbt_bouncer/checks/manifest/check_unit_tests.py +++ b/src/dbt_bouncer/checks/manifest/check_unit_tests.py @@ -21,18 +21,6 @@ class CheckUnitTestExpectFormats(BaseCheck): - name: Literal["check_unit_test_expect_format"] - permitted_formats: List[Literal["csv", "dict", "sql"]] = Field( - default=["csv", "dict", "sql"], - ) - - -def check_unit_test_expect_format( - manifest_obj: "DbtBouncerManifest", - unit_test: "UnitTests", - permitted_formats: List[Literal["csv", "dict", "sql"]] = ["csv", "dict", "sql"], # noqa: B006 - **kwargs, -) -> None: """Unit tests can only use the specified formats. !!! warning @@ -40,8 +28,10 @@ def check_unit_test_expect_format( This check is only supported for dbt 1.8.0 and above. Parameters: - manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. permitted_formats (Optional[List[Literal["csv", "dict", "sql"]]]): A list of formats that are allowed to be used for `expect` input in a unit test. + + Receives: + manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. unit_test (UnitTests): The UnitTests object to check. Other Parameters: @@ -58,29 +48,30 @@ def check_unit_test_expect_format( ``` """ - if semver.Version.parse(manifest_obj.manifest.metadata.dbt_version) >= "1.8.0": - assert ( - unit_test.expect.format.value in permitted_formats - ), f"Unit test `{unit_test.name}` has an `expect` format that is not permitted. Permitted formats are: {permitted_formats}." - else: - logging.warning( - "The `check_unit_test_expect_format` check is only supported for dbt 1.8.0 and above.", - ) - -class CheckUnitTestGivenFormats(BaseCheck): - name: Literal["check_unit_test_given_formats"] + manifest_obj: "DbtBouncerManifest" = Field(default=None) + name: Literal["check_unit_test_expect_format"] permitted_formats: List[Literal["csv", "dict", "sql"]] = Field( default=["csv", "dict", "sql"], ) + unit_test: "UnitTests" = Field(default=None) + + def execute(self) -> None: + """Execute the check.""" + if ( + semver.Version.parse(self.manifest_obj.manifest.metadata.dbt_version) + >= "1.8.0" + ): + assert ( + self.unit_test.expect.format.value in self.permitted_formats + ), f"Unit test `{self.unit_test.name}` has an `expect` format that is not permitted. Permitted formats are: {self.permitted_formats}." + else: + logging.warning( + "The `check_unit_test_expect_format` check is only supported for dbt 1.8.0 and above.", + ) -def check_unit_test_given_formats( - manifest_obj: "DbtBouncerManifest", - unit_test: "UnitTests", - permitted_formats: List[Literal["csv", "dict", "sql"]] = ["csv", "dict", "sql"], # noqa: B006 - **kwargs, -) -> None: +class CheckUnitTestGivenFormats(BaseCheck): """Unit tests can only use the specified formats. !!! warning @@ -88,8 +79,10 @@ def check_unit_test_given_formats( This check is only supported for dbt 1.8.0 and above. Parameters: - manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. permitted_formats (Optional[List[Literal["csv", "dict", "sql"]]]): A list of formats that are allowed to be used for `expect` input in a unit test. + + Receives: + manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. unit_test (UnitTests): The UnitTests object to check. Other Parameters: @@ -106,12 +99,25 @@ def check_unit_test_given_formats( ``` """ - if semver.Version.parse(manifest_obj.manifest.metadata.dbt_version) >= "1.8.0": - given_formats = [i.format.value for i in unit_test.given] - assert all( - e in permitted_formats for e in given_formats - ), f"Unit test `{unit_test.name}` has given formats which are not permitted. Permitted formats are: {permitted_formats}." - else: - logging.warning( - "The `check_unit_test_given_formats` check is only supported for dbt 1.8.0 and above.", - ) + + manifest_obj: "DbtBouncerManifest" = Field(default=None) + name: Literal["check_unit_test_given_formats"] + permitted_formats: List[Literal["csv", "dict", "sql"]] = Field( + default=["csv", "dict", "sql"], + ) + unit_test: "UnitTests" = Field(default=None) + + def execute(self) -> None: + """Execute the check.""" + if ( + semver.Version.parse(self.manifest_obj.manifest.metadata.dbt_version) + >= "1.8.0" + ): + given_formats = [i.format.value for i in self.unit_test.given] + assert all( + e in self.permitted_formats for e in given_formats + ), f"Unit test `{self.unit_test.name}` has given formats which are not permitted. Permitted formats are: {self.permitted_formats}." + else: + logging.warning( + "The `check_unit_test_given_formats` check is only supported for dbt 1.8.0 and above.", + ) diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index c4157a74..af371fde 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -182,6 +182,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( Exposures, # noqa: F401 Macros, # noqa: F401 + UnitTests, # noqa: F401 ) import dbt_bouncer.checks # noqa: F401 diff --git a/tests/unit/checks/manifest/test_unit_tests.py b/tests/unit/checks/manifest/test_unit_tests.py index b85e2cac..8853d819 100644 --- a/tests/unit/checks/manifest/test_unit_tests.py +++ b/tests/unit/checks/manifest/test_unit_tests.py @@ -8,9 +8,13 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import UnitTests from dbt_bouncer.checks.manifest.check_unit_tests import ( - check_unit_test_expect_format, - check_unit_test_given_formats, + CheckUnitTestExpectFormats, + CheckUnitTestGivenFormats, ) +from dbt_bouncer.parsers import DbtBouncerManifest # noqa: F401 + +CheckUnitTestExpectFormats.model_rebuild() +CheckUnitTestGivenFormats.model_rebuild() @pytest.mark.parametrize( @@ -66,11 +70,12 @@ def test_check_unit_test_expect_format( expectation, ): with expectation: - check_unit_test_expect_format( + CheckUnitTestExpectFormats( manifest_obj=manifest_obj, + name="check_unit_test_expect_format", permitted_formats=permitted_formats, unit_test=unit_test, - ) + ).execute() @pytest.mark.parametrize( @@ -132,8 +137,9 @@ def test_check_unit_test_given_formats( expectation, ): with expectation: - check_unit_test_given_formats( + CheckUnitTestGivenFormats( manifest_obj=manifest_obj, + name="check_unit_test_given_formats", permitted_formats=permitted_formats, unit_test=unit_test, - ) + ).execute() From 92ee1de2234cc69d3b30f03a90051ee139d8a1d5 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 14:45:52 +0200 Subject: [PATCH 09/17] Moving Sources to new method --- dbt-bouncer-example.yml | 42 +-- .../checks/manifest/check_sources.py | 306 +++++++++--------- src/dbt_bouncer/config_file_validator.py | 2 + src/dbt_bouncer/parsers.py | 5 +- tests/unit/checks/manifest/test_sources.py | 88 +++-- 5 files changed, 243 insertions(+), 200 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index fc196084..c1767f96 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -129,27 +129,27 @@ manifest_checks: # # Semantic models # - name: check_semantic_model_based_on_non_public_models -# # Sources -# - name: check_source_description_populated -# - name: check_source_freshness_populated -# - name: check_source_loader_populated -# - name: check_source_has_meta_keys -# keys: -# - contact: -# - email -# - slack -# - owner -# - name: check_source_has_tags -# tags: -# - example_tag -# - name: check_source_names -# source_name_pattern: > -# ^[a-z0-9_]*$ -# - name: check_source_not_orphaned -# include: ^models/staging/crm -# - name: check_source_property_file_location -# - name: check_source_used_by_models_in_same_directory -# - name: check_source_used_by_only_one_model + # Sources + - name: check_source_description_populated + - name: check_source_freshness_populated + - name: check_source_loader_populated + - name: check_source_has_meta_keys + keys: + - contact: + - email + - slack + - owner + - name: check_source_has_tags + tags: + - example_tag + - name: check_source_names + source_name_pattern: > + ^[a-z0-9_]*$ + - name: check_source_not_orphaned + include: ^models/staging/crm + - name: check_source_property_file_location + - name: check_source_used_by_models_in_same_directory + - name: check_source_used_by_only_one_model # Unit tests - name: check_unit_test_expect_format diff --git a/src/dbt_bouncer/checks/manifest/check_sources.py b/src/dbt_bouncer/checks/manifest/check_sources.py index 95a5b90d..629c7eaf 100644 --- a/src/dbt_bouncer/checks/manifest/check_sources.py +++ b/src/dbt_bouncer/checks/manifest/check_sources.py @@ -5,22 +5,23 @@ if TYPE_CHECKING: from dbt_bouncer.checks.common import NestedDict - from dbt_bouncer.parsers import DbtBouncerModel, DbtBouncerSource + from dbt_bouncer.parsers import ( + DbtBouncerModelBase, + DbtBouncerSourceBase, + ) + +from pydantic import Field from dbt_bouncer.check_base import BaseCheck from dbt_bouncer.utils import find_missing_meta_keys class CheckSourceDescriptionPopulated(BaseCheck): - name: Literal["check_source_description_populated"] - - -def check_source_description_populated(source: "DbtBouncerSource", **kwargs) -> None: """Sources must have a populated description. - Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. + Receives: + source (DbtBouncerSourceBase): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -34,20 +35,22 @@ def check_source_description_populated(source: "DbtBouncerSource", **kwargs) -> ``` """ - assert ( - len(source.description.strip()) > 4 - ), f"`{source.source_name}.{source.name}` does not have a populated description." + name: Literal["check_source_description_populated"] + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourceFreshnessPopulated(BaseCheck): - name: Literal["check_source_freshness_populated"] + def execute(self) -> None: + """Execute the check.""" + assert ( + len(self.source.description.strip()) > 4 + ), f"`{self.source.source_name}.{self.source.name}` does not have a populated description." -def check_source_freshness_populated(source: "DbtBouncerSource", **kwargs) -> None: +class CheckSourceFreshnessPopulated(BaseCheck): """Sources must have a populated freshness. - Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. + Receives: + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -61,34 +64,31 @@ def check_source_freshness_populated(source: "DbtBouncerSource", **kwargs) -> No ``` """ - error_msg = ( - f"`{source.source_name}.{source.name}` does not have a populated freshness." - ) - assert source.freshness is not None, error_msg - assert ( - source.freshness.error_after.count is not None - and source.freshness.error_after.period is not None - ) or ( - source.freshness.warn_after.count is not None - and source.freshness.warn_after.period is not None - ), error_msg + name: Literal["check_source_freshness_populated"] + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourceHasMetaKeys(BaseCheck): - keys: "NestedDict" - name: Literal["check_source_has_meta_keys"] + def execute(self) -> None: + """Execute the check.""" + error_msg = f"`{self.source.source_name}.{self.source.name}` does not have a populated freshness." + assert self.source.freshness is not None, error_msg + assert ( + self.source.freshness.error_after.count is not None + and self.source.freshness.error_after.period is not None + ) or ( + self.source.freshness.warn_after.count is not None + and self.source.freshness.warn_after.period is not None + ), error_msg -def check_source_has_meta_keys( - keys: "NestedDict", - source: "DbtBouncerSource", - **kwargs, -) -> None: +class CheckSourceHasMetaKeys(BaseCheck): """The `meta` config for sources must have the specified keys. Parameters: keys (NestedDict): A list (that may contain sub-lists) of required keys. - source (DbtBouncerSource): The DbtBouncerSource object to check. + + Receives: + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -107,29 +107,28 @@ def check_source_has_meta_keys( ``` """ - missing_keys = find_missing_meta_keys( - meta_config=source.meta, - required_keys=keys, - ) - assert ( - missing_keys == [] - ), f"`{source.source_name}.{source.name}` is missing the following keys from the `meta` config: {[x.replace('>>', '') for x in missing_keys]}" + keys: "NestedDict" + name: Literal["check_source_has_meta_keys"] + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourceHasTags(BaseCheck): - name: Literal["check_source_has_tags"] - tags: List[str] + def execute(self) -> None: + """Execute the check.""" + missing_keys = find_missing_meta_keys( + meta_config=self.source.meta, + required_keys=self.keys.model_dump(), + ) + + assert ( + missing_keys == [] + ), f"`{self.source.source_name}.{self.source.name}` is missing the following keys from the `meta` config: {[x.replace('>>', '') for x in missing_keys]}" -def check_source_has_tags( - source: "DbtBouncerSource", - tags: List[str], - **kwargs, -) -> None: +class CheckSourceHasTags(BaseCheck): """Sources must have the specified tags. Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. tags (List[str]): List of tags to check for. Other Parameters: @@ -147,21 +146,22 @@ def check_source_has_tags( ``` """ - missing_tags = [tag for tag in tags if tag not in source.tags] - assert ( - not missing_tags - ), f"`{source.source_name}.{source.name}` is missing required tags: {missing_tags}." + name: Literal["check_source_has_tags"] + source: "DbtBouncerSourceBase" = Field(default=None) + tags: List[str] -class CheckSourceLoaderPopulated(BaseCheck): - name: Literal["check_source_loader_populated"] + def execute(self) -> None: + """Execute the check.""" + missing_tags = [tag for tag in self.tags if tag not in self.source.tags] + assert not missing_tags, f"`{self.source.source_name}.{self.source.name}` is missing required tags: {missing_tags}." -def check_source_loader_populated(source: "DbtBouncerSource", **kwargs) -> None: +class CheckSourceLoaderPopulated(BaseCheck): """Sources must have a populated loader. Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -175,27 +175,26 @@ def check_source_loader_populated(source: "DbtBouncerSource", **kwargs) -> None: ``` """ - assert ( - source.loader != "" - ), f"`{source.source_name}.{source.name}` does not have a populated loader." + name: Literal["check_source_loader_populated"] + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourceNames(BaseCheck): - name: Literal["check_source_names"] - source_name_pattern: str + def execute(self) -> None: + """Execute the check.""" + assert ( + self.source.loader != "" + ), f"`{self.source.source_name}.{self.source.name}` does not have a populated loader." -def check_source_names( - source: "DbtBouncerSource", - source_name_pattern: str, - **kwargs, -) -> None: +class CheckSourceNames(BaseCheck): """Sources must have a name that matches the supplied regex. Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. source_name_pattern (str): Regexp the source name must match. + Receives: + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Only source paths that match the pattern will be checked. @@ -210,25 +209,25 @@ def check_source_names( ``` """ - assert ( - re.compile(source_name_pattern.strip()).match(source.name) is not None - ), f"`{source.source_name}.{source.name}` does not match the supplied regex `({source_name_pattern.strip()})`." + name: Literal["check_source_names"] + source_name_pattern: str + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourceNotOrphaned(BaseCheck): - name: Literal["check_source_not_orphaned"] + def execute(self) -> None: + """Execute the check.""" + assert ( + re.compile(self.source_name_pattern.strip()).match(self.source.name) + is not None + ), f"`{self.source.source_name}.{self.source.name}` does not match the supplied regex `({self.source_name_pattern.strip()})`." -def check_source_not_orphaned( - models: List["DbtBouncerModel"], - source: "DbtBouncerSource", - **kwargs, -) -> None: +class CheckSourceNotOrphaned(BaseCheck): """Sources must be referenced in at least one model. - Parameters: - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. - source (DbtBouncerSource): The DbtBouncerSource object to check. + Receives: + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -242,21 +241,26 @@ def check_source_not_orphaned( ``` """ - num_refs = sum(source.unique_id in model.depends_on.nodes for model in models) - assert ( - num_refs >= 1 - ), f"Source `{source.source_name}.{source.name}` is orphaned, i.e. not referenced by any model." + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_source_not_orphaned"] + source: "DbtBouncerSourceBase" = Field(default=None) -class CheckSourcePropertyFileLocation(BaseCheck): - name: Literal["check_source_property_file_location"] + def execute(self) -> None: + """Execute the check.""" + num_refs = sum( + self.source.unique_id in model.depends_on.nodes for model in self.models + ) + assert ( + num_refs >= 1 + ), f"Source `{self.source.source_name}.{self.source.name}` is orphaned, i.e. not referenced by any model." -def check_source_property_file_location(source: "DbtBouncerSource", **kwargs) -> None: +class CheckSourcePropertyFileLocation(BaseCheck): """Source properties files must follow the guidance provided by dbt [here](https://docs.getdbt.com/best-practices/how-we-structure/1-guide-overview). - Parameters: - source (DbtBouncerSource): The DbtBouncerSource object to check. + Receives: + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -270,42 +274,40 @@ def check_source_property_file_location(source: "DbtBouncerSource", **kwargs) -> ``` """ - path_cleaned = source.original_file_path.replace("models/staging", "") - expected_substring = "_".join(path_cleaned.split("/")[:-1]) - - assert path_cleaned.split( - "/", - )[ - -1 - ].startswith( - "_", - ), f"The properties file for `{source.source_name}.{source.name}` (`{path_cleaned}`) does not start with an underscore." - assert ( - expected_substring in path_cleaned - ), f"The properties file for `{source.source_name}.{source.name}` (`{path_cleaned}`) does not contain the expected substring (`{expected_substring}`)." - assert path_cleaned.split( - "/", - )[ - -1 - ].endswith( - "__sources.yml", - ), f"The properties file for `{source.source_name}.{source.name}` (`{path_cleaned}`) does not end with `__sources.yml`." - -class CheckSourceUsedByModelsInSameDirectory(BaseCheck): - name: Literal["check_source_used_by_models_in_same_directory"] + name: Literal["check_source_property_file_location"] + source: "DbtBouncerSourceBase" = Field(default=None) + + def execute(self) -> None: + """Execute the check.""" + path_cleaned = self.source.original_file_path.replace("models/staging", "") + expected_substring = "_".join(path_cleaned.split("/")[:-1]) + + assert path_cleaned.split( + "/", + )[ + -1 + ].startswith( + "_", + ), f"The properties file for `{self.source.source_name}.{self.source.name}` (`{path_cleaned}`) does not start with an underscore." + assert ( + expected_substring in path_cleaned + ), f"The properties file for `{self.source.source_name}.{self.source.name}` (`{path_cleaned}`) does not contain the expected substring (`{expected_substring}`)." + assert path_cleaned.split( + "/", + )[ + -1 + ].endswith( + "__sources.yml", + ), f"The properties file for `{self.source.source_name}.{self.source.name}` (`{path_cleaned}`) does not end with `__sources.yml`." -def check_source_used_by_models_in_same_directory( - models: List["DbtBouncerModel"], - source: "DbtBouncerSource", - **kwargs, -) -> None: +class CheckSourceUsedByModelsInSameDirectory(BaseCheck): """Sources can only be referenced by models that are located in the same directory where the source is defined. Parameters: - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. - source (DbtBouncerSource): The DbtBouncerSource object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -319,34 +321,33 @@ def check_source_used_by_models_in_same_directory( ``` """ - reffed_models_not_in_same_dir = [] - for model in models: - if ( - source.unique_id in model.depends_on.nodes - and model.original_file_path.split("/")[:-1] - != source.original_file_path.split("/")[:-1] - ): - reffed_models_not_in_same_dir.append(model.unique_id.split(".")[0]) - assert ( - len(reffed_models_not_in_same_dir) == 0 - ), f"Source `{source.source_name}.{source.name}` is referenced by models defined in a different directory: {reffed_models_not_in_same_dir}" + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_source_used_by_models_in_same_directory"] + source: "DbtBouncerSourceBase" = Field(default=None) + def execute(self) -> None: + """Execute the check.""" + reffed_models_not_in_same_dir = [] + for model in self.models: + if ( + self.source.unique_id in model.depends_on.nodes + and model.original_file_path.split("/")[:-1] + != self.source.original_file_path.split("/")[:-1] + ): + reffed_models_not_in_same_dir.append(model.unique_id.split(".")[0]) -class CheckSourceUsedByOnlyOneModel(BaseCheck): - name: Literal["check_source_used_by_only_one_model"] + assert ( + len(reffed_models_not_in_same_dir) == 0 + ), f"Source `{self.source.source_name}.{self.source.name}` is referenced by models defined in a different directory: {reffed_models_not_in_same_dir}" -def check_source_used_by_only_one_model( - models: List["DbtBouncerModel"], - source: "DbtBouncerSource", - **kwargs, -) -> None: +class CheckSourceUsedByOnlyOneModel(BaseCheck): """Each source can be referenced by a maximum of one model. - Parameters: - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. - source (DbtBouncerSource): The DbtBouncerSource object to check. + Receives: + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + source (DbtBouncerSource): The DbtBouncerSourceBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -360,7 +361,16 @@ def check_source_used_by_only_one_model( ``` """ - num_refs = sum(source.unique_id in model.depends_on.nodes for model in models) - assert ( - num_refs <= 1 - ), f"Source `{source.source_name}.{source.name}` is referenced by more than one model." + + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_source_used_by_only_one_model"] + source: "DbtBouncerSourceBase" = Field(default=None) + + def execute(self) -> None: + """Execute the check.""" + num_refs = sum( + self.source.unique_id in model.depends_on.nodes for model in self.models + ) + assert ( + num_refs <= 1 + ), f"Source `{self.source.source_name}.{self.source.name}` is referenced by more than one model." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index af371fde..6157fd3b 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -193,6 +193,8 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerModelBase, DbtBouncerRunResult, DbtBouncerRunResultBase, + DbtBouncerSource, + DbtBouncerSourceBase, DbtBouncerTest, ) diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index 64c0f232..f361c809 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -111,11 +111,14 @@ class DbtBouncerSemanticModel(BaseModel): unique_id: str +DbtBouncerSourceBase = Union[SourceDefinition_v10, SourceDefinition_v11, Sources] + + class DbtBouncerSource(BaseModel): """Model for all source nodes in `manifest.json`.""" original_file_path: str - source: Union[SourceDefinition_v10, SourceDefinition_v11, Sources] + source: DbtBouncerSourceBase unique_id: str diff --git a/tests/unit/checks/manifest/test_sources.py b/tests/unit/checks/manifest/test_sources.py index 86f8210a..929bb45e 100644 --- a/tests/unit/checks/manifest/test_sources.py +++ b/tests/unit/checks/manifest/test_sources.py @@ -7,18 +7,36 @@ warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4, Sources +from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.checks.manifest.check_sources import ( - check_source_description_populated, - check_source_freshness_populated, - check_source_has_meta_keys, - check_source_has_tags, - check_source_loader_populated, - check_source_names, - check_source_not_orphaned, - check_source_property_file_location, - check_source_used_by_models_in_same_directory, - check_source_used_by_only_one_model, + CheckSourceDescriptionPopulated, + CheckSourceFreshnessPopulated, + CheckSourceHasMetaKeys, + CheckSourceHasTags, + CheckSourceLoaderPopulated, + CheckSourceNames, + CheckSourceNotOrphaned, + CheckSourcePropertyFileLocation, + CheckSourceUsedByModelsInSameDirectory, + CheckSourceUsedByOnlyOneModel, ) +from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerModel, + DbtBouncerModelBase, + DbtBouncerSource, + DbtBouncerSourceBase, +) + +CheckSourceDescriptionPopulated.model_rebuild() +CheckSourceFreshnessPopulated.model_rebuild() +CheckSourceHasMetaKeys.model_rebuild() +CheckSourceHasTags.model_rebuild() +CheckSourceLoaderPopulated.model_rebuild() +CheckSourceNames.model_rebuild() +CheckSourceNotOrphaned.model_rebuild() +CheckSourcePropertyFileLocation.model_rebuild() +CheckSourceUsedByModelsInSameDirectory.model_rebuild() +CheckSourceUsedByOnlyOneModel.model_rebuild() @pytest.mark.parametrize( @@ -172,9 +190,10 @@ ) def test_check_source_description_populated(source, expectation): with expectation: - check_source_description_populated( + CheckSourceDescriptionPopulated( + name="check_source_description_populated", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -305,9 +324,10 @@ def test_check_source_description_populated(source, expectation): ) def test_check_source_freshness_populated(source, expectation): with expectation: - check_source_freshness_populated( + CheckSourceFreshnessPopulated( + name="check_source_freshness_populated", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -357,9 +377,10 @@ def test_check_source_freshness_populated(source, expectation): ) def test_check_source_loader_populated(source, expectation): with expectation: - check_source_loader_populated( + CheckSourceLoaderPopulated( + name="check_source_loader_populated", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -504,10 +525,11 @@ def test_check_source_loader_populated(source, expectation): ) def test_check_source_has_meta_keys(keys, source, expectation): with expectation: - check_source_has_meta_keys( + CheckSourceHasMetaKeys( keys=keys, + name="check_source_has_meta_keys", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -561,10 +583,11 @@ def test_check_source_has_meta_keys(keys, source, expectation): ) def test_check_source_has_tags(source, tags, expectation): with expectation: - check_source_has_tags( + CheckSourceHasTags( + name="check_source_has_tags", source=source, tags=tags, - ) + ).execute() @pytest.mark.parametrize( @@ -618,10 +641,11 @@ def test_check_source_has_tags(source, tags, expectation): ) def test_check_source_names(source_name_pattern, source, expectation): with expectation: - check_source_names( + CheckSourceNames( + name="check_source_names", source_name_pattern=source_name_pattern, source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -794,10 +818,11 @@ def test_check_source_names(source_name_pattern, source, expectation): ) def test_check_source_not_orphaned(models, source, expectation): with expectation: - check_source_not_orphaned( + CheckSourceNotOrphaned( models=models, + name="check_source_not_orphaned", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -912,9 +937,10 @@ def test_check_source_not_orphaned(models, source, expectation): ) def test_check_source_property_file_location(source, expectation): with expectation: - check_source_property_file_location( + CheckSourcePropertyFileLocation( + name="check_source_property_file_location", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -1018,10 +1044,11 @@ def test_check_source_property_file_location(source, expectation): ) def test_check_source_used_by_models_in_same_directory(models, source, expectation): with expectation: - check_source_used_by_models_in_same_directory( + CheckSourceUsedByModelsInSameDirectory( models=models, + name="check_source_used_by_models_in_same_directory", source=source, - ) + ).execute() @pytest.mark.parametrize( @@ -1194,7 +1221,8 @@ def test_check_source_used_by_models_in_same_directory(models, source, expectati ) def test_check_source_used_by_only_one_model(models, source, expectation): with expectation: - check_source_used_by_only_one_model( + CheckSourceUsedByOnlyOneModel( models=models, + name="check_source_used_by_only_one_model", source=source, - ) + ).execute() From a7dd14b3cfbf9e2abd17881681ad24401be1a2bc Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 14:51:39 +0200 Subject: [PATCH 10/17] Moving Semantic Models to new method --- dbt-bouncer-example.yml | 4 +- .../checks/manifest/check_semantic_models.py | 50 ++++++++++--------- src/dbt_bouncer/config_file_validator.py | 2 + src/dbt_bouncer/parsers.py | 7 ++- .../checks/manifest/test_semantic_models.py | 13 +++-- 5 files changed, 47 insertions(+), 29 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index c1767f96..222c3b1d 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -126,8 +126,8 @@ manifest_checks: # - name: check_model_test_coverage # - name: check_model_property_file_location -# # Semantic models -# - name: check_semantic_model_based_on_non_public_models + # Semantic models + - name: check_semantic_model_based_on_non_public_models # Sources - name: check_source_description_populated diff --git a/src/dbt_bouncer/checks/manifest/check_semantic_models.py b/src/dbt_bouncer/checks/manifest/check_semantic_models.py index 8ca1611a..4640684b 100644 --- a/src/dbt_bouncer/checks/manifest/check_semantic_models.py +++ b/src/dbt_bouncer/checks/manifest/check_semantic_models.py @@ -8,26 +8,23 @@ if TYPE_CHECKING: import warnings - from dbt_bouncer.parsers import DbtBouncerModel, DbtBouncerSemanticModel + from dbt_bouncer.parsers import ( + DbtBouncerModelBase, + DbtBouncerSemanticModelBase, + ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - -class CheckSemanticModelOnNonPublicModels(BaseCheck): - name: Literal["check_semantic_model_based_on_non_public_models"] +from pydantic import Field -def check_semantic_model_based_on_non_public_models( - models: List["DbtBouncerModel"], - semantic_model: "DbtBouncerSemanticModel", - **kwargs, -) -> None: +class CheckSemanticModelOnNonPublicModels(BaseCheck): """Semantic models should be based on public models only. - Parameters: - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. - semantic_model (DbtBouncerSemanticModel): The DbtBouncerSemanticModel object to check. + Receives: + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + semantic_model (DbtBouncerSemanticModelBase): The DbtBouncerSemanticModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the semantic model path (i.e the .yml file where the semantic model is configured). Semantic model paths that match the pattern will not be checked. @@ -41,14 +38,21 @@ def check_semantic_model_based_on_non_public_models( ``` """ - non_public_upstream_dependencies = [] - for model in semantic_model.depends_on.nodes: - if ( - model.split(".")[0] == "model" - and model.split(".")[1] == semantic_model.package_name - ): - model = next(m for m in models if m.unique_id == model) - if model.access.value != "public": - non_public_upstream_dependencies.append(model.name) - - assert not non_public_upstream_dependencies, f"Semantic model `{semantic_model.name}` is based on a model(s) that is not public: {non_public_upstream_dependencies}." + + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_semantic_model_based_on_non_public_models"] + semantic_model: "DbtBouncerSemanticModelBase" = Field(default=None) + + def execute(self) -> None: + """Execute the check.""" + non_public_upstream_dependencies = [] + for model in self.semantic_model.depends_on.nodes: + if ( + model.split(".")[0] == "model" + and model.split(".")[1] == self.semantic_model.package_name + ): + model = next(m for m in self.models if m.unique_id == model) + if model.access.value != "public": + non_public_upstream_dependencies.append(model.name) + + assert not non_public_upstream_dependencies, f"Semantic model `{self.semantic_model.name}` is based on a model(s) that is not public: {non_public_upstream_dependencies}." diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index 6157fd3b..cc9058fa 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -193,6 +193,8 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerModelBase, DbtBouncerRunResult, DbtBouncerRunResultBase, + DbtBouncerSemanticModel, + DbtBouncerSemanticModelBase, DbtBouncerSource, DbtBouncerSourceBase, DbtBouncerTest, diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index f361c809..56078bc2 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -103,11 +103,16 @@ class DbtBouncerRunResult(BaseModel): unique_id: str +DbtBouncerSemanticModelBase = Union[ + SemanticModel_v10, SemanticModel_v11, SemanticModels +] + + class DbtBouncerSemanticModel(BaseModel): """Model for all semantic model nodes in `manifest.json`.""" original_file_path: str - semantic_model: Union[SemanticModel_v10, SemanticModel_v11, SemanticModels] + semantic_model: DbtBouncerSemanticModelBase unique_id: str diff --git a/tests/unit/checks/manifest/test_semantic_models.py b/tests/unit/checks/manifest/test_semantic_models.py index 4310568b..28d9e554 100644 --- a/tests/unit/checks/manifest/test_semantic_models.py +++ b/tests/unit/checks/manifest/test_semantic_models.py @@ -10,8 +10,14 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import SemanticModels from dbt_bouncer.checks.manifest.check_semantic_models import ( - check_semantic_model_based_on_non_public_models, + CheckSemanticModelOnNonPublicModels, ) +from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerModelBase, + DbtBouncerSemanticModelBase, +) + +CheckSemanticModelOnNonPublicModels.model_rebuild() @pytest.mark.parametrize( @@ -103,7 +109,8 @@ def test_check_semantic_model_based_on_non_public_models( models, semantic_model, expectation ): with expectation: - check_semantic_model_based_on_non_public_models( + CheckSemanticModelOnNonPublicModels( models=models, + name="check_semantic_model_based_on_non_public_models", semantic_model=semantic_model, - ) + ).execute() From 50950c82f0a5f6f13879bf7da4d1d0429194667d Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 15:00:11 +0200 Subject: [PATCH 11/17] Moving Metadata to new method --- dbt-bouncer-example.yml | 6 +-- .../checks/manifest/check_metadata.py | 52 +++++++++---------- src/dbt_bouncer/runner.py | 2 + tests/unit/checks/manifest/test_metadata.py | 10 ++-- 4 files changed, 38 insertions(+), 32 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 222c3b1d..9368ca46 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -63,9 +63,9 @@ manifest_checks: - name: check_macro_name_matches_file_name - name: check_macro_property_file_location -# # Metadata -# - name: check_project_name -# project_name_pattern: ^dbt_bouncer_ + # Metadata + - name: check_project_name + project_name_pattern: ^dbt_bouncer_ # # Models # - name: check_model_access diff --git a/src/dbt_bouncer/checks/manifest/check_metadata.py b/src/dbt_bouncer/checks/manifest/check_metadata.py index fa00db17..4b5e3c18 100644 --- a/src/dbt_bouncer/checks/manifest/check_metadata.py +++ b/src/dbt_bouncer/checks/manifest/check_metadata.py @@ -10,31 +10,14 @@ class CheckProjectName(BaseModel): - model_config = ConfigDict(extra="forbid") - - index: Optional[int] = Field( - default=None, - description="Index to uniquely identify the check, calculated at runtime.", - ) - name: Literal["check_project_name"] - project_name_pattern: str - severity: Optional[Literal["error", "warn"]] = Field( - default="error", - description="Severity of the check, one of 'error' or 'warn'.", - ) - - -def check_project_name( - manifest_obj: "DbtBouncerManifest", - project_name_pattern: str, - **kwargs, -) -> None: """Enforce that the name of the dbt project matches a supplied regex. Generally used to enforce that project names conform to something like `company_`. Parameters: - manifest_obj (DbtBouncerManifest): The manifest object. project_name_pattern (str): Regex pattern to match the project name. + Receives: + manifest_obj (DbtBouncerManifest): The manifest object. + Other Parameters: severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. @@ -46,9 +29,26 @@ def check_project_name( ``` """ - assert ( - re.compile(project_name_pattern.strip()).match( - manifest_obj.manifest.metadata.project_name, - ) - is not None - ), f"Project name (`{manifest_obj.manifest.metadata.project_name}`) does not conform to the supplied regex `({project_name_pattern.strip()})`." + + model_config = ConfigDict(extra="forbid") + + index: Optional[int] = Field( + default=None, + description="Index to uniquely identify the check, calculated at runtime.", + ) + manifest_obj: "DbtBouncerManifest" = Field(default=None) + name: Literal["check_project_name"] + project_name_pattern: str + severity: Optional[Literal["error", "warn"]] = Field( + default="error", + description="Severity of the check, one of 'error' or 'warn'.", + ) + + def execute(self) -> None: + """Execute the check.""" + assert ( + re.compile(self.project_name_pattern.strip()).match( + self.manifest_obj.manifest.metadata.project_name, + ) + is not None + ), f"Project name (`{self.manifest_obj.manifest.metadata.project_name}`) does not conform to the supplied regex `({self.project_name_pattern.strip()})`." diff --git a/src/dbt_bouncer/runner.py b/src/dbt_bouncer/runner.py index a81c4072..cf075e22 100644 --- a/src/dbt_bouncer/runner.py +++ b/src/dbt_bouncer/runner.py @@ -132,6 +132,8 @@ def runner( ) else: check_run_id = f"{check.name}:{check.index}" + for x in parsed_data.keys() & check.__annotations__.keys(): + setattr(check, x, parsed_data[x]) checks_to_run.append( { **{ diff --git a/tests/unit/checks/manifest/test_metadata.py b/tests/unit/checks/manifest/test_metadata.py index 813432ab..93160cc3 100644 --- a/tests/unit/checks/manifest/test_metadata.py +++ b/tests/unit/checks/manifest/test_metadata.py @@ -2,7 +2,10 @@ import pytest -from dbt_bouncer.checks.manifest.check_metadata import check_project_name +from dbt_bouncer.checks.manifest.check_metadata import CheckProjectName +from dbt_bouncer.parsers import DbtBouncerManifest # noqa: F401 + +CheckProjectName.model_rebuild() @pytest.mark.parametrize( @@ -23,7 +26,8 @@ ) def test_check_project_name(manifest_obj, project_name_pattern, expectation): with expectation: - check_project_name( + CheckProjectName( manifest_obj=manifest_obj, + name="check_project_name", project_name_pattern=project_name_pattern, - ) + ).execute() From f2d4169ed7371e0318c267eba3b21ae4f1a06c19 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 15:13:53 +0200 Subject: [PATCH 12/17] Moving Catalog Sources to new method --- dbt-bouncer-example.yml | 6 +-- .../checks/catalog/check_catalog_sources.py | 47 ++++++++++++------- src/dbt_bouncer/config_file_validator.py | 9 ++++ .../checks/catalog/test_catalog_sources.py | 10 ++-- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 9368ca46..2f771db9 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -3,7 +3,7 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # # include: ^models/marts # Optional, here for demonstration purposes only # # severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. -# catalog_checks: +catalog_checks: # - name: check_column_description_populated # include: ^models/marts # - name: check_column_name_complies_to_column_type @@ -31,8 +31,8 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # - name: check_columns_are_all_documented # include: ^models/marts # - name: check_columns_are_documented_in_public_models -# - name: check_source_columns_are_all_documented -# exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only + - name: check_source_columns_are_all_documented + exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only manifest_checks: # Exposures diff --git a/src/dbt_bouncer/checks/catalog/check_catalog_sources.py b/src/dbt_bouncer/checks/catalog/check_catalog_sources.py index 162dfe67..331c699c 100644 --- a/src/dbt_bouncer/checks/catalog/check_catalog_sources.py +++ b/src/dbt_bouncer/checks/catalog/check_catalog_sources.py @@ -1,25 +1,27 @@ from typing import TYPE_CHECKING, List, Literal +from pydantic import Field + from dbt_bouncer.check_base import BaseCheck if TYPE_CHECKING: - from dbt_bouncer.parsers import DbtBouncerCatalogNode, DbtBouncerSource + import warnings + from dbt_bouncer.parsers import ( + DbtBouncerSourceBase, + ) -class CheckSourceColumnsAreAllDocumented(BaseCheck): - name: Literal["check_source_columns_are_all_documented"] + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogTable -def check_source_columns_are_all_documented( - catalog_source: "DbtBouncerCatalogNode", - sources: List["DbtBouncerSource"], - **kwargs, -) -> None: +class CheckSourceColumnsAreAllDocumented(BaseCheck): """All columns in a source should be included in the source's properties file, i.e. `.yml` file. - Parameters: - catalog_source (DbtBouncerCatalogNode): The DbtBouncerCatalogNode object to check. - sources (List[DbtBouncerSource]): List of DbtBouncerSource objects parsed from `catalog.json`. + Receives: + catalog_source (CatalogTable): The CatalogTable object to check. + sources (List[DbtBouncerSourceBase]): List of DbtBouncerSourceBase objects parsed from `catalog.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the source path (i.e the .yml file where the source is configured). Source paths that match the pattern will not be checked. @@ -33,10 +35,19 @@ def check_source_columns_are_all_documented( ``` """ - source = next(s for s in sources if s.unique_id == catalog_source.unique_id) - undocumented_columns = [ - v.name - for _, v in catalog_source.columns.items() - if v.name not in source.columns - ] - assert not undocumented_columns, f"`{catalog_source.unique_id}` has columns that are not included in the sources properties file: {undocumented_columns}" + + catalog_source: "CatalogTable" = Field(default=None) + name: Literal["check_source_columns_are_all_documented"] + sources: List["DbtBouncerSourceBase"] = Field(default=[]) + + def execute(self) -> None: + """Execute the check.""" + source = next( + s for s in self.sources if s.unique_id == self.catalog_source.unique_id + ) + undocumented_columns = [ + v.name + for _, v in self.catalog_source.columns.items() + if v.name not in source.columns + ] + assert not undocumented_columns, f"`{self.catalog_source.unique_id}` has columns that are not included in the sources properties file: {undocumented_columns}" diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index cc9058fa..5130fb75 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -179,6 +179,8 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": logging.info("Validating conf...") # Rebuild the model to ensure all fields are present + import warnings + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( Exposures, # noqa: F401 Macros, # noqa: F401 @@ -188,6 +190,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerCatalogNode, DbtBouncerManifest, DbtBouncerModel, DbtBouncerModelBase, @@ -200,6 +203,12 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerTest, ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + from dbt_artifacts_parser.parsers.catalog.catalog_v1 import ( + CatalogTable, # noqa: F401 + ) + DbtBouncerConf.model_rebuild() return DbtBouncerConf(**config_file_contents) diff --git a/tests/unit/checks/catalog/test_catalog_sources.py b/tests/unit/checks/catalog/test_catalog_sources.py index f759c6f5..6899bcf3 100644 --- a/tests/unit/checks/catalog/test_catalog_sources.py +++ b/tests/unit/checks/catalog/test_catalog_sources.py @@ -9,8 +9,11 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Sources from dbt_bouncer.checks.catalog.check_catalog_sources import ( - check_source_columns_are_all_documented, + CheckSourceColumnsAreAllDocumented, ) +from dbt_bouncer.parsers import DbtBouncerSourceBase # noqa: F401 + +CheckSourceColumnsAreAllDocumented.model_rebuild() @pytest.mark.parametrize( @@ -121,7 +124,8 @@ ) def test_check_source_columns_are_all_documented(catalog_source, sources, expectation): with expectation: - check_source_columns_are_all_documented( + CheckSourceColumnsAreAllDocumented( catalog_source=catalog_source, + name="check_source_columns_are_all_documented", sources=sources, - ) + ).execute() From fa51cb4c75608d0ec729a7df4c88925ccddcb256 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 15:32:52 +0200 Subject: [PATCH 13/17] Moving Columns to new method --- dbt-bouncer-example.yml | 54 ++--- .../checks/catalog/check_columns.py | 216 +++++++++--------- src/dbt_bouncer/config_file_validator.py | 1 + src/dbt_bouncer/parsers.py | 5 +- tests/unit/checks/catalog/test_columns.py | 48 ++-- 5 files changed, 175 insertions(+), 149 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 2f771db9..597f1605 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -4,33 +4,33 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artif # # severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. catalog_checks: -# - name: check_column_description_populated -# include: ^models/marts -# - name: check_column_name_complies_to_column_type -# column_name_pattern: ^is_.* -# exclude: ^models/staging -# types: -# - BOOLEAN -# - name: check_column_name_complies_to_column_type -# column_name_pattern: .*_date$ -# include: ^models/staging # Not a good idea, here for demonstration purposes only -# types: -# - DATE -# - name: check_column_name_complies_to_column_type -# column_name_pattern: ^[a-z_]*$ -# types: -# - BIGINT -# - BOOLEAN -# - DATE -# - DOUBLE -# - INTEGER -# - VARCHAR -# - name: check_column_has_specified_test -# column_name_pattern: ^is_.* -# test_name: not_null -# - name: check_columns_are_all_documented -# include: ^models/marts -# - name: check_columns_are_documented_in_public_models + - name: check_column_description_populated + include: ^models/marts + - name: check_column_name_complies_to_column_type + column_name_pattern: ^is_.* + exclude: ^models/staging + types: + - BOOLEAN + - name: check_column_name_complies_to_column_type + column_name_pattern: .*_date$ + include: ^models/staging # Not a good idea, here for demonstration purposes only + types: + - DATE + - name: check_column_name_complies_to_column_type + column_name_pattern: ^[a-z_]*$ + types: + - BIGINT + - BOOLEAN + - DATE + - DOUBLE + - INTEGER + - VARCHAR + - name: check_column_has_specified_test + column_name_pattern: ^is_.* + test_name: not_null + - name: check_columns_are_all_documented + include: ^models/marts + - name: check_columns_are_documented_in_public_models - name: check_source_columns_are_all_documented exclude: ^models/staging/crm # Not a good idea, here for demonstration purposes only diff --git a/src/dbt_bouncer/checks/catalog/check_columns.py b/src/dbt_bouncer/checks/catalog/check_columns.py index 4278b9f9..77b25f45 100644 --- a/src/dbt_bouncer/checks/catalog/check_columns.py +++ b/src/dbt_bouncer/checks/catalog/check_columns.py @@ -9,25 +9,19 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogTable - from dbt_bouncer.parsers import DbtBouncerModel, DbtBouncerTest + from dbt_bouncer.parsers import DbtBouncerModelBase, DbtBouncerTestBase + +from pydantic import Field from dbt_bouncer.check_base import BaseCheck class CheckColumnDescriptionPopulated(BaseCheck): - name: Literal["check_column_description_populated"] - - -def check_column_description_populated( - catalog_node: "CatalogTable", - models: List["DbtBouncerModel"], - **kwargs, -) -> None: """Columns must have a populated description. - Parameters: + Receives: catalog_node (CatalogTable): The CatalogTable object to check. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -42,38 +36,38 @@ def check_column_description_populated( ``` """ - if catalog_node.unique_id.split(".")[0] == "model": - model = next(m for m in models if m.unique_id == catalog_node.unique_id) - non_complying_columns = [] - for _, v in catalog_node.columns.items(): - if ( - model.columns.get(v.name) is None - or len(model.columns[v.name].description.strip()) <= 4 - ): - non_complying_columns.append(v.name) - assert not non_complying_columns, f"`{catalog_node.unique_id.split('.')[-1]}` has columns that do not have a populated description: {non_complying_columns}" + catalog_node: "CatalogTable" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_column_description_populated"] + def execute(self) -> None: + """Execute the check.""" + if self.catalog_node.unique_id.split(".")[0] == "model": + model = next( + m for m in self.models if m.unique_id == self.catalog_node.unique_id + ) + non_complying_columns = [] + for _, v in self.catalog_node.columns.items(): + if ( + model.columns.get(v.name) is None + or len(model.columns[v.name].description.strip()) <= 4 + ): + non_complying_columns.append(v.name) -class CheckColumnNameCompliesToColumnType(BaseCheck): - column_name_pattern: str - name: Literal["check_column_name_complies_to_column_type"] - types: List[str] + assert not non_complying_columns, f"`{self.catalog_node.unique_id.split('.')[-1]}` has columns that do not have a populated description: {non_complying_columns}" -def check_column_name_complies_to_column_type( - catalog_node: "CatalogTable", - column_name_pattern: str, - types: List[str], - **kwargs, -) -> None: +class CheckColumnNameCompliesToColumnType(BaseCheck): """Columns with specified data types must comply to the specified regexp naming pattern. Parameters: - catalog_node (CatalogTable): The CatalogTable object to check. column_name_pattern: (str): Regex pattern to match the model name. types (List[str]): List of data types to check. + Receives: + catalog_node (CatalogTable): The CatalogTable object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. @@ -111,28 +105,28 @@ def check_column_name_complies_to_column_type( ``` """ - non_complying_columns = [ - v.name - for _, v in catalog_node.columns.items() - if v.type in types - and re.compile(column_name_pattern.strip()).match(v.name) is None - ] - assert not non_complying_columns, f"`{catalog_node.unique_id.split('.')[-1]}` has columns that don't comply with the specified regexp pattern (`{column_name_pattern}`): {non_complying_columns}" + catalog_node: "CatalogTable" = Field(default=None) + column_name_pattern: str + name: Literal["check_column_name_complies_to_column_type"] + types: List[str] + def execute(self) -> None: + """Execute the check.""" + non_complying_columns = [ + v.name + for _, v in self.catalog_node.columns.items() + if v.type in self.types + and re.compile(self.column_name_pattern.strip()).match(v.name) is None + ] -class CheckColumnsAreAllDocumented(BaseCheck): - name: Literal["check_columns_are_all_documented"] + assert not non_complying_columns, f"`{self.catalog_node.unique_id.split('.')[-1]}` has columns that don't comply with the specified regexp pattern (`{self.column_name_pattern}`): {non_complying_columns}" -def check_columns_are_all_documented( - catalog_node: "CatalogTable", - models: List["DbtBouncerModel"], - **kwargs, -) -> None: +class CheckColumnsAreAllDocumented(BaseCheck): """All columns in a model should be included in the model's properties file, i.e. `.yml` file. - Parameters: + Receives: catalog_node (CatalogTable): The CatalogTable object to check. models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. @@ -148,28 +142,29 @@ def check_columns_are_all_documented( ``` """ - if catalog_node.unique_id.split(".")[0] == "model": - model = next(m for m in models if m.unique_id == catalog_node.unique_id) - undocumented_columns = [ - v.name - for _, v in catalog_node.columns.items() - if v.name not in model.columns - ] - assert not undocumented_columns, f"`{catalog_node.unique_id.split('.')[-1]}` has columns that are not included in the models properties file: {undocumented_columns}" + catalog_node: "CatalogTable" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_columns_are_all_documented"] -class CheckColumnsAreDocumentedInPublicModels(BaseCheck): - name: Literal["check_columns_are_documented_in_public_models"] + def execute(self) -> None: + """Execute the check.""" + if self.catalog_node.unique_id.split(".")[0] == "model": + model = next( + m for m in self.models if m.unique_id == self.catalog_node.unique_id + ) + undocumented_columns = [ + v.name + for _, v in self.catalog_node.columns.items() + if v.name not in model.columns + ] + assert not undocumented_columns, f"`{self.catalog_node.unique_id.split('.')[-1]}` has columns that are not included in the models properties file: {undocumented_columns}" -def check_columns_are_documented_in_public_models( - catalog_node: "CatalogTable", - models: List["DbtBouncerModel"], - **kwargs, -) -> None: +class CheckColumnsAreDocumentedInPublicModels(BaseCheck): """Columns should have a populated description in public models. - Parameters: + Receives: catalog_node (CatalogTable): The CatalogTable object to check. models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. @@ -185,38 +180,40 @@ def check_columns_are_documented_in_public_models( ``` """ - if catalog_node.unique_id.split(".")[0] == "model": - model = next(m for m in models if m.unique_id == catalog_node.unique_id) - non_complying_columns = [] - for _, v in catalog_node.columns.items(): - if model.access.value == "public": - column_config = model.columns.get(v.name) - if column_config is None or len(column_config.description.strip()) < 4: - non_complying_columns.append(v.name) - - assert not non_complying_columns, f"`{catalog_node.unique_id.split('.')[-1]}` is a public model but has columns that don't have a populated description: {non_complying_columns}" + catalog_node: "CatalogTable" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_columns_are_documented_in_public_models"] -class CheckColumnHasSpecifiedTest(BaseCheck): - column_name_pattern: str - name: Literal["check_column_has_specified_test"] - test_name: str + def execute(self) -> None: + """Execute the check.""" + if self.catalog_node.unique_id.split(".")[0] == "model": + model = next( + m for m in self.models if m.unique_id == self.catalog_node.unique_id + ) + non_complying_columns = [] + for _, v in self.catalog_node.columns.items(): + if model.access.value == "public": + column_config = model.columns.get(v.name) + if ( + column_config is None + or len(column_config.description.strip()) < 4 + ): + non_complying_columns.append(v.name) + + assert not non_complying_columns, f"`{self.catalog_node.unique_id.split('.')[-1]}` is a public model but has columns that don't have a populated description: {non_complying_columns}" -def check_column_has_specified_test( - catalog_node: "CatalogTable", - column_name_pattern: str, - test_name: str, - tests: List["DbtBouncerTest"], - **kwargs, -) -> None: +class CheckColumnHasSpecifiedTest(BaseCheck): """Columns that match the specified regexp pattern must have a specified test. Parameters: - catalog_node (CatalogTable): The CatalogTable object to check. column_name_pattern (str): Regex pattern to match the column name. test_name (str): Name of the test to check for. - tests (List[DbtBouncerTest]): List of DbtBouncerTest objects parsed from `manifest.json`. + + Receives: + catalog_node (CatalogTable): The CatalogTable object to check. + tests (List[DbtBouncerTestBase]): List of DbtBouncerTestBase objects parsed from `manifest.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -232,22 +229,31 @@ def check_column_has_specified_test( ``` """ - columns_to_check = [ - v.name - for _, v in catalog_node.columns.items() - if re.compile(column_name_pattern.strip()).match(v.name) is not None - ] - relevant_tests = [ - t - for t in tests - if t.test_metadata.name == test_name - and t.attached_node == catalog_node.unique_id - ] - non_complying_columns = [ - c - for c in columns_to_check - if f"{catalog_node.unique_id}.{c}" - not in [f"{t.attached_node}.{t.column_name}" for t in relevant_tests] - ] - - assert not non_complying_columns, f"`{catalog_node.unique_id.split('.')[-1]}` has columns that should have a `{test_name}` test: {non_complying_columns}" + + catalog_node: "CatalogTable" = Field(default=None) + column_name_pattern: str + name: Literal["check_column_has_specified_test"] + test_name: str + tests: List["DbtBouncerTestBase"] = Field(default=[]) + + def execute(self) -> None: + """Execute the check.""" + columns_to_check = [ + v.name + for _, v in self.catalog_node.columns.items() + if re.compile(self.column_name_pattern.strip()).match(v.name) is not None + ] + relevant_tests = [ + t + for t in self.tests + if t.test_metadata.name == self.test_name + and t.attached_node == self.catalog_node.unique_id + ] + non_complying_columns = [ + c + for c in columns_to_check + if f"{self.catalog_node.unique_id}.{c}" + not in [f"{t.attached_node}.{t.column_name}" for t in relevant_tests] + ] + + assert not non_complying_columns, f"`{self.catalog_node.unique_id.split('.')[-1]}` has columns that should have a `{self.test_name}` test: {non_complying_columns}" diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index 5130fb75..c48d5fba 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -201,6 +201,7 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerSource, DbtBouncerSourceBase, DbtBouncerTest, + DbtBouncerTestBase, ) with warnings.catch_warnings(): diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index 56078bc2..9d360889 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -127,11 +127,14 @@ class DbtBouncerSource(BaseModel): unique_id: str +DbtBouncerTestBase = Union[GenericTestNode_v10, GenericTestNode_v11, Nodes6] + + class DbtBouncerTest(BaseModel): """Model for all test nodes in `manifest.json`.""" original_file_path: str - test: Union[GenericTestNode_v10, GenericTestNode_v11, Nodes6] + test: DbtBouncerTestBase unique_id: str diff --git a/tests/unit/checks/catalog/test_columns.py b/tests/unit/checks/catalog/test_columns.py index 2ee5dfb8..db71df47 100644 --- a/tests/unit/checks/catalog/test_columns.py +++ b/tests/unit/checks/catalog/test_columns.py @@ -7,15 +7,26 @@ warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogTable from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4, Nodes6 - from dbt_bouncer.checks.catalog.check_columns import ( - check_column_description_populated, - check_column_has_specified_test, - check_column_name_complies_to_column_type, - check_columns_are_all_documented, - check_columns_are_documented_in_public_models, + CheckColumnDescriptionPopulated, + CheckColumnHasSpecifiedTest, + CheckColumnNameCompliesToColumnType, + CheckColumnsAreAllDocumented, + CheckColumnsAreDocumentedInPublicModels, +) +from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerModelBase, + DbtBouncerTest, + DbtBouncerTestBase, ) +CheckColumnDescriptionPopulated.model_rebuild() +CheckColumnNameCompliesToColumnType.model_rebuild() +CheckColumnNameCompliesToColumnType.model_rebuild() +CheckColumnsAreAllDocumented.model_rebuild() +CheckColumnsAreDocumentedInPublicModels.model_rebuild() +CheckColumnHasSpecifiedTest.model_rebuild() + @pytest.mark.parametrize( ("catalog_node", "models", "expectation"), @@ -135,10 +146,11 @@ ) def test_check_column_description_populated(catalog_node, models, expectation): with expectation: - check_column_description_populated( + CheckColumnDescriptionPopulated( catalog_node=catalog_node, models=models, - ) + name="check_column_description_populated", + ).execute() @pytest.mark.parametrize( @@ -252,12 +264,13 @@ def test_check_column_has_specified_test( expectation, ): with expectation: - check_column_has_specified_test( + CheckColumnHasSpecifiedTest( catalog_node=catalog_node, column_name_pattern=column_name_pattern, + name="check_column_has_specified_test", test_name=test_name, tests=tests, - ) + ).execute() @pytest.mark.parametrize( @@ -370,10 +383,11 @@ def test_check_column_has_specified_test( ) def test_check_columns_are_all_documented(catalog_node, models, expectation): with expectation: - check_columns_are_all_documented( + CheckColumnsAreAllDocumented( catalog_node=catalog_node, models=models, - ) + name="check_columns_are_all_documented", + ).execute() @pytest.mark.parametrize( @@ -490,10 +504,11 @@ def test_check_columns_are_documented_in_public_models( expectation, ): with expectation: - check_columns_are_documented_in_public_models( + CheckColumnsAreDocumentedInPublicModels( catalog_node=catalog_node, models=models, - ) + name="check_columns_are_documented_in_public_models", + ).execute() @pytest.mark.parametrize( @@ -587,8 +602,9 @@ def test_check_column_name_complies_to_column_type( expectation, ): with expectation: - check_column_name_complies_to_column_type( + CheckColumnNameCompliesToColumnType( catalog_node=catalog_node, column_name_pattern=column_name_pattern, + name="check_column_name_complies_to_column_type", types=types, - ) + ).execute() From c03838539f9577ebfd9f29b38e9d87a6f057af01 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 16:27:10 +0200 Subject: [PATCH 14/17] Moving Models to new method --- dbt-bouncer-example.yml | 114 +-- .../checks/manifest/check_models.py | 892 +++++++++--------- tests/unit/checks/manifest/test_models.py | 188 ++-- 3 files changed, 627 insertions(+), 567 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 597f1605..33faa91f 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -67,64 +67,64 @@ manifest_checks: - name: check_project_name project_name_pattern: ^dbt_bouncer_ -# # Models -# - name: check_model_access -# include: ^models/intermediate -# access: protected -# severity: warn -# - name: check_model_access -# include: ^models/marts -# access: public -# - name: check_model_access -# include: ^models/staging -# access: protected -# - name: check_model_code_does_not_contain_regexp_pattern -# regexp_pattern: .*[i][f][n][u][l][l].* -# - name: check_model_contract_enforced_for_public_model -# - name: check_model_depends_on_multiple_sources -# - name: check_model_description_populated -# - name: check_model_directories -# include: ^models -# permitted_sub_directories: -# - intermediate -# - marts -# - staging -# - utilities -# - name: check_model_directories -# include: ^models/staging -# permitted_sub_directories: -# - crm -# - payments -# - name: check_model_documentation_coverage -# - name: check_model_documented_in_same_directory -# - name: check_model_has_contracts_enforced -# include: ^models/marts -# - name: check_model_has_meta_keys -# keys: -# - maturity -# - name: check_model_has_no_upstream_dependencies -# include: ^((?!int_*).)*$ -# exclude: ^models/utilities/time_spines -# - name: check_model_has_tags -# include: ^models/staging/crm -# tags: -# - crm + # Models + - name: check_model_access + include: ^models/intermediate + access: protected + severity: warn + - name: check_model_access + include: ^models/marts + access: public + - name: check_model_access + include: ^models/staging + access: protected + - name: check_model_code_does_not_contain_regexp_pattern + regexp_pattern: .*[i][f][n][u][l][l].* + - name: check_model_contract_enforced_for_public_model + - name: check_model_depends_on_multiple_sources + - name: check_model_description_populated + - name: check_model_directories + include: ^models + permitted_sub_directories: + - intermediate + - marts + - staging + - utilities + - name: check_model_directories + include: ^models/staging + permitted_sub_directories: + - crm + - payments + - name: check_model_documentation_coverage + - name: check_model_documented_in_same_directory + - name: check_model_has_contracts_enforced + include: ^models/marts + - name: check_model_has_meta_keys + keys: + - maturity + - name: check_model_has_no_upstream_dependencies + include: ^((?!int_*).)*$ + exclude: ^models/utilities/time_spines + - name: check_model_has_tags + include: ^models/staging/crm + tags: + - crm - name: check_model_has_unique_test -# - name: check_model_max_chained_views -# - name: check_model_max_fanout -# max_downstream_models: 2 -# - name: check_model_max_number_of_lines -# - name: check_model_max_upstream_dependencies -# - name: check_model_has_unit_tests -# include: ^models/marts -# - name: check_model_names -# include: ^models/intermediate -# model_name_pattern: ^int_ -# - name: check_model_names -# include: ^models/staging -# model_name_pattern: ^stg_ -# - name: check_model_test_coverage -# - name: check_model_property_file_location + - name: check_model_max_chained_views + - name: check_model_max_fanout + max_downstream_models: 2 + - name: check_model_max_number_of_lines + - name: check_model_max_upstream_dependencies + - name: check_model_has_unit_tests + include: ^models/marts + - name: check_model_names + include: ^models/intermediate + model_name_pattern: ^int_ + - name: check_model_names + include: ^models/staging + model_name_pattern: ^stg_ + - name: check_model_test_coverage + - name: check_model_property_file_location # Semantic models - name: check_semantic_model_based_on_non_public_models diff --git a/src/dbt_bouncer/checks/manifest/check_models.py b/src/dbt_bouncer/checks/manifest/check_models.py index 17381553..c5dd372a 100644 --- a/src/dbt_bouncer/checks/manifest/check_models.py +++ b/src/dbt_bouncer/checks/manifest/check_models.py @@ -20,24 +20,26 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( UnitTests, ) - from dbt_bouncer.parsers import DbtBouncerManifest, DbtBouncerModel, DbtBouncerTest + from dbt_bouncer.parsers import ( + DbtBouncerManifest, + DbtBouncerModelBase, + DbtBouncerTestBase, + ) +if TYPE_CHECKING: + from dbt_bouncer.parsers import ( + DbtBouncerManifest, + DbtBouncerModelBase, + ) class CheckModelAccess(BaseCheck): - access: Literal["private", "protected", "public"] - name: Literal["check_model_access"] - - -def check_model_access( - access: str, - model: "DbtBouncerModel", - **kwargs, -) -> None: """Models must have the specified access attribute. Requires dbt 1.7+. Parameters: access (Literal["private", "protected", "public"]): The access level to check for. - model (DbtBouncerModel): The DbtBouncerModel object to check. + + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -60,23 +62,23 @@ def check_model_access( ``` """ - assert ( - model.access.value == access - ), f"`{model.name}` has `{model.access.value}` access, it should have access `{access}`." + access: Literal["private", "protected", "public"] + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_access"] -class CheckModelContractsEnforcedForPublicModel(BaseCheck): - name: Literal["check_model_contract_enforced_for_public_model"] + def execute(self) -> None: + """Execute the check.""" + assert ( + self.model.access.value == self.access + ), f"`{self.model.name}` has `{self.model.access.value}` access, it should have access `{self.access}`." -def check_model_contract_enforced_for_public_model( - model: "DbtBouncerModel", - **kwargs, -) -> None: +class CheckModelContractsEnforcedForPublicModel(BaseCheck): """Public models must have contracts enforced. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -90,21 +92,23 @@ def check_model_contract_enforced_for_public_model( ``` """ - if model.access.value == "public": - assert ( - model.contract.enforced is True - ), f"`{model.name}` is a public model but does not have contracts enforced." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_contract_enforced_for_public_model"] -class CheckModelDescriptionPopulated(BaseCheck): - name: Literal["check_model_description_populated"] + def execute(self) -> None: + """Execute the check.""" + if self.model.access.value == "public": + assert ( + self.model.contract.enforced is True + ), f"`{self.model.name}` is a public model but does not have contracts enforced." -def check_model_description_populated(model: "DbtBouncerModel", **kwargs) -> None: +class CheckModelDescriptionPopulated(BaseCheck): """Models must have a populated description. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -118,40 +122,25 @@ def check_model_description_populated(model: "DbtBouncerModel", **kwargs) -> Non ``` """ - assert ( - len(model.description.strip()) > 4 - ), f"`{model.name}` does not have a populated description." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_description_populated"] -class CheckModelsDocumentationCoverage(BaseModel): - model_config = ConfigDict(extra="forbid") - - index: Optional[int] = Field( - default=None, - description="Index to uniquely identify the check, calculated at runtime.", - ) - name: Literal["check_model_documentation_coverage"] - min_model_documentation_coverage_pct: int = Field( - default=100, - ge=0, - le=100, - ) - severity: Optional[Literal["error", "warn"]] = Field( - default="error", - description="Severity of the check, one of 'error' or 'warn'.", - ) + def execute(self) -> None: + """Execute the check.""" + assert ( + len(self.model.description.strip()) > 4 + ), f"`{self.model.name}` does not have a populated description." -def check_model_documentation_coverage( - models: List["DbtBouncerModel"], - min_model_documentation_coverage_pct: float = 100, - **kwargs, -) -> None: +class CheckModelsDocumentationCoverage(BaseModel): """Set the minimum percentage of models that have a populated description. Parameters: min_model_documentation_coverage_pct (float): The minimum percentage of models that must have a populated description. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. + + Receives: + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. @@ -164,31 +153,48 @@ def check_model_documentation_coverage( ``` """ - num_models = len(models) - models_with_description = [] - for model in models: - if len(model.description.strip()) > 4: - models_with_description.append(model.unique_id) - num_models_with_descriptions = len(models_with_description) - model_description_coverage_pct = (num_models_with_descriptions / num_models) * 100 + model_config = ConfigDict(extra="forbid") - assert ( - model_description_coverage_pct >= min_model_documentation_coverage_pct - ), f"Only {model_description_coverage_pct}% of models have a populated description, this is less than the permitted minimum of {min_model_documentation_coverage_pct}%." + index: Optional[int] = Field( + default=None, + description="Index to uniquely identify the check, calculated at runtime.", + ) + min_model_documentation_coverage_pct: int = Field( + default=100, + ge=0, + le=100, + ) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_model_documentation_coverage"] + severity: Optional[Literal["error", "warn"]] = Field( + default="error", + description="Severity of the check, one of 'error' or 'warn'.", + ) + def execute(self) -> None: + """Execute the check.""" + num_models = len(self.models) + models_with_description = [] + for model in self.models: + if len(model.description.strip()) > 4: + models_with_description.append(model.unique_id) -class CheckModelDocumentedInSameDirectory(BaseCheck): - name: Literal["check_model_documented_in_same_directory"] + num_models_with_descriptions = len(models_with_description) + model_description_coverage_pct = ( + num_models_with_descriptions / num_models + ) * 100 + + assert ( + model_description_coverage_pct >= self.min_model_documentation_coverage_pct + ), f"Only {model_description_coverage_pct}% of models have a populated description, this is less than the permitted minimum of {self.min_model_documentation_coverage_pct}%." -def check_model_documented_in_same_directory( - model: "DbtBouncerModel", **kwargs -) -> None: +class CheckModelDocumentedInSameDirectory(BaseCheck): """Models must be documented in the same directory where they are defined (i.e. `.yml` and `.sql` files are in the same directory). - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -202,30 +208,31 @@ def check_model_documented_in_same_directory( ``` """ - model_doc_dir = model.patch_path[model.patch_path.find("models") :].split("/")[:-1] - model_sql_dir = model.original_file_path.split("/")[:-1] - assert ( - model_doc_dir == model_sql_dir - ), f"`{model.name}` is documented in a different directory to the `.sql` file: `{'/'.join(model_doc_dir)}` vs `{'/'.join(model_sql_dir)}`." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_documented_in_same_directory"] + def execute(self) -> None: + """Execute the check.""" + model_doc_dir = self.model.patch_path[ + self.model.patch_path.find("models") : + ].split("/")[:-1] + model_sql_dir = self.model.original_file_path.split("/")[:-1] -class CheckModelCodeDoesNotContainRegexpPattern(BaseCheck): - name: Literal["check_model_code_does_not_contain_regexp_pattern"] - regexp_pattern: str + assert ( + model_doc_dir == model_sql_dir + ), f"`{self.model.name}` is documented in a different directory to the `.sql` file: `{'/'.join(model_doc_dir)}` vs `{'/'.join(model_sql_dir)}`." -def check_model_code_does_not_contain_regexp_pattern( - model: "DbtBouncerModel", - regexp_pattern: str, - **kwargs, -) -> None: +class CheckModelCodeDoesNotContainRegexpPattern(BaseCheck): """The raw code for a model must not match the specified regexp pattern. Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. regexp_pattern (str): The regexp pattern that should not be matched by the model code. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. @@ -240,21 +247,26 @@ def check_model_code_does_not_contain_regexp_pattern( ``` """ - assert ( - re.compile(regexp_pattern.strip(), flags=re.DOTALL).match(model.raw_code) - is None - ), f"`{model.name}` contains a banned string: `{regexp_pattern.strip()}`." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_code_does_not_contain_regexp_pattern"] + regexp_pattern: str -class CheckModelDependsOnMultipleSources(BaseCheck): - name: Literal["check_model_depends_on_multiple_sources"] + def execute(self) -> None: + """Execute the check.""" + assert ( + re.compile(self.regexp_pattern.strip(), flags=re.DOTALL).match( + self.model.raw_code + ) + is None + ), f"`{self.model.name}` contains a banned string: `{self.regexp_pattern.strip()}`." -def check_model_depends_on_multiple_sources(model: "DbtBouncerModel", **kwargs) -> None: +class CheckModelDependsOnMultipleSources(BaseCheck): """Models cannot reference more than one source. Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -268,30 +280,30 @@ def check_model_depends_on_multiple_sources(model: "DbtBouncerModel", **kwargs) ``` """ - num_reffed_sources = sum( - x.split(".")[0] == "source" for x in model.depends_on.nodes - ) - assert num_reffed_sources <= 1, f"`{model.name}` references more than one source." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_depends_on_multiple_sources"] -class CheckModelDirectories(BaseCheck): - name: Literal["check_model_directories"] - permitted_sub_directories: List[str] + def execute(self) -> None: + """Execute the check.""" + num_reffed_sources = sum( + x.split(".")[0] == "source" for x in self.model.depends_on.nodes + ) + assert ( + num_reffed_sources <= 1 + ), f"`{self.model.name}` references more than one source." -def check_model_directories( - include: str, - model: "DbtBouncerModel", - permitted_sub_directories: List[str], - **kwargs, -) -> None: +class CheckModelDirectories(BaseCheck): """Only specified sub-directories are permitted. Parameters: include (str): Regex pattern to the directory to check. - model (DbtBouncerModel): The DbtBouncerModel object to check. permitted_sub_directories (List[str]): List of permitted sub-directories. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + Example(s): ```yaml manifest_checks: @@ -312,23 +324,29 @@ def check_model_directories( ``` """ - matched_path = re.compile(include.strip()).match(model.original_file_path) - path_after_match = model.original_file_path[matched_path.end() + 1 :] # type: ignore[union-attr] - assert ( - path_after_match.split("/")[0] in permitted_sub_directories - ), f"`{model.name}` is located in `{model.original_file_path.split('/')[1]}`, this is not a valid sub-directory. {path_after_match.split('/')[0]}" + include: str + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_directories"] + permitted_sub_directories: List[str] + def execute(self) -> None: + """Execute the check.""" + matched_path = re.compile(self.include.strip()).match( + self.model.original_file_path + ) + path_after_match = self.model.original_file_path[matched_path.end() + 1 :] # type: ignore[union-attr] -class CheckModelHasContractsEnforced(BaseCheck): - name: Literal["check_model_has_contracts_enforced"] + assert ( + path_after_match.split("/")[0] in self.permitted_sub_directories + ), f"`{self.model.name}` is located in `{self.model.original_file_path.split('/')[1]}`, this is not a valid sub-directory. {path_after_match.split('/')[0]}" -def check_model_has_contracts_enforced(model: "DbtBouncerModel", **kwargs) -> None: +class CheckModelHasContractsEnforced(BaseCheck): """Model must have contracts enforced. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -343,26 +361,23 @@ def check_model_has_contracts_enforced(model: "DbtBouncerModel", **kwargs) -> No ``` """ - assert ( - model.contract.enforced is True - ), f"`{model.name}` does not have contracts enforced." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_has_contracts_enforced"] -class CheckModelHasMetaKeys(BaseCheck): - keys: NestedDict - name: Literal["check_model_has_meta_keys"] + def execute(self) -> None: + """Execute the check.""" + assert ( + self.model.contract.enforced is True + ), f"`{self.model.name}` does not have contracts enforced." -def check_model_has_meta_keys( - keys: NestedDict, - model: "DbtBouncerModel", - **kwargs, -) -> None: +class CheckModelHasMetaKeys(BaseCheck): """The `meta` config for models must have the specified keys. Parameters: keys (NestedDict): A list (that may contain sub-lists) of required keys. - model (DbtBouncerModel): The DbtBouncerModel object to check. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -379,26 +394,27 @@ def check_model_has_meta_keys( ``` """ - missing_keys = find_missing_meta_keys( - meta_config=model.meta, - required_keys=keys, - ) - assert ( - missing_keys == [] - ), f"`{model.name}` is missing the following keys from the `meta` config: {[x.replace('>>', '') for x in missing_keys]}" + keys: NestedDict + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_has_meta_keys"] -class CheckModelHasNoUpstreamDependencies(BaseCheck): - name: Literal["check_model_has_no_upstream_dependencies"] + def execute(self) -> None: + """Execute the check.""" + missing_keys = find_missing_meta_keys( + meta_config=self.model.meta, + required_keys=self.keys.model_dump(), + ) + assert ( + missing_keys == [] + ), f"`{self.model.name}` is missing the following keys from the `meta` config: {[x.replace('>>', '') for x in missing_keys]}" -def check_model_has_no_upstream_dependencies( - model: "DbtBouncerModel", **kwargs -) -> None: +class CheckModelHasNoUpstreamDependencies(BaseCheck): """Identify if models have no upstream dependencies as this likely indicates hard-coded tables references. - Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -412,21 +428,22 @@ def check_model_has_no_upstream_dependencies( ``` """ - assert ( - len(model.depends_on.nodes) > 0 - ), f"`{model.name}` has no upstream dependencies, this likely indicates hard-coded tables references." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_has_no_upstream_dependencies"] -class CheckModelHasTags(BaseCheck): - name: Literal["check_model_has_tags"] - tags: List[str] + def execute(self) -> None: + """Execute the check.""" + assert ( + len(self.model.depends_on.nodes) > 0 + ), f"`{self.model.name}` has no upstream dependencies, this likely indicates hard-coded tables references." -def check_model_has_tags(model: "DbtBouncerModel", tags: List[str], **kwargs) -> None: +class CheckModelHasTags(BaseCheck): """Models must have the specified tags. Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. tags (List[str]): List of tags to check for. Other Parameters: @@ -444,11 +461,50 @@ def check_model_has_tags(model: "DbtBouncerModel", tags: List[str], **kwargs) -> ``` """ - missing_tags = [tag for tag in tags if tag not in model.tags] - assert not missing_tags, f"`{model.name}` is missing required tags: {missing_tags}." + + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_has_tags"] + tags: List[str] + + def execute(self) -> None: + """Execute the check.""" + missing_tags = [tag for tag in self.tags if tag not in self.model.tags] + assert ( + not missing_tags + ), f"`{self.model.name}` is missing required tags: {missing_tags}." class CheckModelHasUniqueTest(BaseCheck): + """Models must have a test for uniqueness of a column. + + Parameters: + accepted_uniqueness_tests (Optional[List[str]]): List of tests that are accepted as uniqueness tests. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + tests (List[DbtBouncerTestBase]): List of DbtBouncerTestBase objects parsed from `manifest.json`. + + Other Parameters: + exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. + include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. + severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. + + Example(s): + ```yaml + manifest_checks: + - name: check_model_has_unique_test + include: ^models/marts + ``` + ```yaml + manifest_checks: + # Example of allowing a custom uniqueness test + - name: check_model_has_unique_test + accepted_uniqueness_tests: + - expect_compound_columns_to_be_unique + - my_custom_uniqueness_test + - unique + ``` + + """ + accepted_uniqueness_tests: Optional[List[str]] = Field( default=[ "expect_compound_columns_to_be_unique", @@ -456,52 +512,12 @@ class CheckModelHasUniqueTest(BaseCheck): "unique", ], ) - model: "DbtBouncerModel" = Field(default=None) + model: "DbtBouncerModelBase" = Field(default=None) name: Literal["check_model_has_unique_test"] - tests: List["DbtBouncerTest"] = Field(default=[]) - - def execute( - # model: "DbtBouncerModel", - # tests: "DbtBouncerTest", - # accepted_uniqueness_tests: List[str] = ( - # [ - # "expect_compound_columns_to_be_unique", - # "dbt_utils.unique_combination_of_columns", - # "unique", - # ] - # ), - # **kwargs, - self, - ) -> None: - """Models must have a test for uniqueness of a column. - - Parameters: - accepted_uniqueness_tests (Optional[List[str]]): List of tests that are accepted as uniqueness tests. - model (DbtBouncerModel): The DbtBouncerModel object to check. - tests (List[DbtBouncerTest]): List of DbtBouncerTest objects parsed from `manifest.json`. - - Other Parameters: - exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. - include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. - severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. - - Example(s): - ```yaml - manifest_checks: - - name: check_model_has_unique_test - include: ^models/marts - ``` - ```yaml - manifest_checks: - # Example of allowing a custom uniqueness test - - name: check_model_has_unique_test - accepted_uniqueness_tests: - - expect_compound_columns_to_be_unique - - my_custom_uniqueness_test - - unique - ``` + tests: List["DbtBouncerTestBase"] = Field(default=[]) - """ + def execute(self) -> None: + """Execute the check.""" num_unique_tests = sum( test.attached_node == self.model.unique_id and test.test_metadata.name in self.accepted_uniqueness_tests # type: ignore[operator] @@ -513,23 +529,14 @@ def execute( class CheckModelHasUnitTests(BaseCheck): - min_number_of_unit_tests: int = Field(default=1) - name: Literal["check_model_has_unit_tests"] - - -def check_model_has_unit_tests( - manifest_obj: "DbtBouncerManifest", - model: "DbtBouncerModel", - unit_tests: List["UnitTests"], - min_number_of_unit_tests: int = 1, - **kwargs, -) -> None: """Models must have more than the specified number of unit tests. Parameters: - manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. min_number_of_unit_tests (Optional[int]): The minimum number of unit tests that a model must have. - model (DbtBouncerModel): The DbtBouncerModel object to check. + + Receives: + manifest_obj (DbtBouncerManifest): The DbtBouncerManifest object parsed from `manifest.json`. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. unit_tests (List[UnitTests]): List of UnitTests objects parsed from `manifest.json`. Other Parameters: @@ -554,48 +561,45 @@ def check_model_has_unit_tests( ``` """ - if semver.Version.parse(manifest_obj.manifest.metadata.dbt_version) >= "1.8.0": - num_unit_tests = len( - [ - t.unique_id - for t in unit_tests - if t.depends_on.nodes[0] == model.unique_id - ], - ) - assert ( - num_unit_tests >= min_number_of_unit_tests - ), f"`{model.name}` has {num_unit_tests} unit tests, this is less than the minimum of {min_number_of_unit_tests}." - else: - logging.warning( - "The `check_model_has_unit_tests` check is only supported for dbt 1.8.0 and above.", - ) - -class CheckModelMaxChainedViews(BaseCheck): - materializations_to_include: List[str] = Field( - default=["ephemeral", "view"], - ) - max_chained_views: int = Field( - default=3, - ) - name: Literal["check_model_max_chained_views"] + manifest_obj: "DbtBouncerManifest" = Field(default=None) + min_number_of_unit_tests: int = Field(default=1) + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_has_unit_tests"] + unit_tests: List["UnitTests"] = Field(default=[]) + + def execute(self) -> None: + """Execute the check.""" + if ( + semver.Version.parse(self.manifest_obj.manifest.metadata.dbt_version) + >= "1.8.0" + ): + num_unit_tests = len( + [ + t.unique_id + for t in self.unit_tests + if t.depends_on.nodes[0] == self.model.unique_id + ], + ) + assert ( + num_unit_tests >= self.min_number_of_unit_tests + ), f"`{self.model.name}` has {num_unit_tests} unit tests, this is less than the minimum of {self.min_number_of_unit_tests}." + else: + logging.warning( + "The `check_model_has_unit_tests` check is only supported for dbt 1.8.0 and above.", + ) -def check_model_max_chained_views( - manifest_obj: "DbtBouncerManifest", - model: "DbtBouncerModel", - models: List["DbtBouncerModel"], - materializations_to_include: List[str] = ["ephemeral", "view"], # noqa: B006 - max_chained_views: int = 3, - **kwargs, -) -> None: +class CheckModelMaxChainedViews(BaseCheck): """Models cannot have more than the specified number of upstream dependents that are not tables. Parameters: materializations_to_include (Optional[List[str]]): List of materializations to include in the check. max_chained_views (Optional[int]): The maximum number of upstream dependents that are not tables. - model (DbtBouncerModel): The DbtBouncerModel object to check. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. + + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -619,83 +623,92 @@ def check_model_max_chained_views( """ - def return_upstream_view_models( - materializations, - max_chained_views, - models, - model_unique_ids_to_check, - package_name, - depth=0, - ): - """Recursive function to return model unique_id's of upstream models that are views. Depth of recursion can be specified. If no models meet the criteria then an empty list is returned. - - Returns - - - List[str]: List of model unique_id's of upstream models that are views. - - """ - if depth == max_chained_views or model_unique_ids_to_check == []: - return model_unique_ids_to_check - - relevant_upstream_models = [] - for model in model_unique_ids_to_check: - upstream_nodes = list( - next(m2 for m2 in models if m2.unique_id == model).depends_on.nodes, - ) - if upstream_nodes != []: - upstream_models = [ - m - for m in upstream_nodes - if m.split(".")[0] == "model" and m.split(".")[1] == package_name - ] - for i in upstream_models: - if ( - next(m for m in models if m.unique_id == i).config.materialized - in materializations - ): - relevant_upstream_models.append(i) - - depth += 1 - return return_upstream_view_models( - materializations=materializations, - max_chained_views=max_chained_views, - models=models, - model_unique_ids_to_check=relevant_upstream_models, - package_name=package_name, - depth=depth, - ) + manifest_obj: "DbtBouncerManifest" = Field(default=None) + materializations_to_include: List[str] = Field( + default=["ephemeral", "view"], + ) + max_chained_views: int = Field( + default=3, + ) + model: "DbtBouncerModelBase" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_model_max_chained_views"] - assert ( - len( - return_upstream_view_models( - materializations=materializations_to_include, + def execute(self) -> None: + """Execute the check.""" + + def return_upstream_view_models( + materializations, + max_chained_views, + models, + model_unique_ids_to_check, + package_name, + depth=0, + ): + """Recursive function to return model unique_id's of upstream models that are views. Depth of recursion can be specified. If no models meet the criteria then an empty list is returned. + + Returns + - + List[str]: List of model unique_id's of upstream models that are views. + + """ + if depth == max_chained_views or model_unique_ids_to_check == []: + return model_unique_ids_to_check + + relevant_upstream_models = [] + for model in model_unique_ids_to_check: + upstream_nodes = list( + next(m2 for m2 in models if m2.unique_id == model).depends_on.nodes, + ) + if upstream_nodes != []: + upstream_models = [ + m + for m in upstream_nodes + if m.split(".")[0] == "model" + and m.split(".")[1] == package_name + ] + for i in upstream_models: + if ( + next( + m for m in models if m.unique_id == i + ).config.materialized + in materializations + ): + relevant_upstream_models.append(i) + + depth += 1 + return return_upstream_view_models( + materializations=materializations, max_chained_views=max_chained_views, models=models, - model_unique_ids_to_check=[model.unique_id], - package_name=manifest_obj.manifest.metadata.project_name, - ), - ) - == 0 - ), f"`{model.name}` has more than {max_chained_views} upstream dependents that are not tables." - + model_unique_ids_to_check=relevant_upstream_models, + package_name=package_name, + depth=depth, + ) -class CheckModelMaxFanout(BaseCheck): - max_downstream_models: int = Field(default=3) - name: Literal["check_model_max_fanout"] + assert ( + len( + return_upstream_view_models( + materializations=self.materializations_to_include, + max_chained_views=self.max_chained_views, + models=self.models, + model_unique_ids_to_check=[self.model.unique_id], + package_name=self.manifest_obj.manifest.metadata.project_name, + ), + ) + == 0 + ), f"`{self.model.name}` has more than {self.max_chained_views} upstream dependents that are not tables." -def check_model_max_fanout( - model: "DbtBouncerModel", - models: List["DbtBouncerModel"], - max_downstream_models: int = 3, - **kwargs, -) -> None: +class CheckModelMaxFanout(BaseCheck): """Models cannot have more than the specified number of downstream models. Parameters: max_downstream_models (Optional[int]): The maximum number of permitted downstream models. - model (DbtBouncerModel): The DbtBouncerModel object to check. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. + + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -710,29 +723,30 @@ def check_model_max_fanout( ``` """ - num_downstream_models = sum(model.unique_id in m.depends_on.nodes for m in models) - assert ( - num_downstream_models <= max_downstream_models - ), f"`{model.name}` has {num_downstream_models} downstream models, which is more than the permitted maximum of {max_downstream_models}." + max_downstream_models: int = Field(default=3) + model: "DbtBouncerModelBase" = Field(default=None) + models: List["DbtBouncerModelBase"] = Field(default=[]) + name: Literal["check_model_max_fanout"] + def execute(self) -> None: + """Execute the check.""" + num_downstream_models = sum( + self.model.unique_id in m.depends_on.nodes for m in self.models + ) -class CheckModelMaxNumberOfLines(BaseCheck): - name: Literal["check_model_max_number_of_lines"] - max_number_of_lines: int = Field(default=100) + assert ( + num_downstream_models <= self.max_downstream_models + ), f"`{self.model.name}` has {num_downstream_models} downstream models, which is more than the permitted maximum of {self.max_downstream_models}." -def check_model_max_number_of_lines( - model: "DbtBouncerModel", - max_number_of_lines: int = 100, - **kwargs, -) -> None: +class CheckModelMaxNumberOfLines(BaseCheck): """Models may not have more than the specified number of lines. Parameters: max_number_of_lines (int): The maximum number of permitted lines. - model (DbtBouncerModel): The DbtBouncerModel object to check. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -751,40 +765,30 @@ def check_model_max_number_of_lines( ``` """ - actual_number_of_lines = model.raw_code.count("\n") + 1 - assert ( - actual_number_of_lines <= max_number_of_lines - ), f"`{model.name}` has {actual_number_of_lines} lines, this is more than the maximum permitted number of lines ({max_number_of_lines})." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_max_number_of_lines"] + max_number_of_lines: int = Field(default=100) + def execute(self) -> None: + """Execute the check.""" + actual_number_of_lines = self.model.raw_code.count("\n") + 1 -class CheckModelMaxUpstreamDependencies(BaseCheck): - max_upstream_macros: int = Field( - default=5, - ) - max_upstream_models: int = Field( - default=5, - ) - max_upstream_sources: int = Field( - default=1, - ) - name: Literal["check_model_max_upstream_dependencies"] + assert ( + actual_number_of_lines <= self.max_number_of_lines + ), f"`{self.model.name}` has {actual_number_of_lines} lines, this is more than the maximum permitted number of lines ({self.max_number_of_lines})." -def check_model_max_upstream_dependencies( - model: "DbtBouncerModel", - max_upstream_macros: int = 5, - max_upstream_models: int = 5, - max_upstream_sources: int = 1, - **kwargs, -) -> None: +class CheckModelMaxUpstreamDependencies(BaseCheck): """Limit the number of upstream dependencies a model has. Parameters: max_upstream_macros (Optional[int]): The maximum number of permitted upstream macros. max_upstream_models (Optional[int]): The maximum number of permitted upstream models. max_upstream_sources (Optional[int]): The maximum number of permitted upstream sources. - model (DbtBouncerModel): The DbtBouncerModel object to check. + + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -799,43 +803,49 @@ def check_model_max_upstream_dependencies( ``` """ - num_upstream_macros = len(list(model.depends_on.macros)) - num_upstream_models = len( - [m for m in model.depends_on.nodes if m.split(".")[0] == "model"], + + max_upstream_macros: int = Field( + default=5, ) - num_upstream_sources = len( - [m for m in model.depends_on.nodes if m.split(".")[0] == "source"], + max_upstream_models: int = Field( + default=5, ) + max_upstream_sources: int = Field( + default=1, + ) + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_max_upstream_dependencies"] - assert ( - num_upstream_macros <= max_upstream_macros - ), f"`{model.name}` has {num_upstream_macros} upstream macros, which is more than the permitted maximum of {max_upstream_macros}." - assert ( - num_upstream_models <= max_upstream_models - ), f"`{model.name}` has {num_upstream_models} upstream models, which is more than the permitted maximum of {max_upstream_models}." - assert ( - num_upstream_sources <= max_upstream_sources - ), f"`{model.name}` has {num_upstream_sources} upstream sources, which is more than the permitted maximum of {max_upstream_sources}." - - -class CheckModelNames(BaseCheck): - model_config = ConfigDict(extra="forbid", protected_namespaces=()) + def execute(self) -> None: + """Execute the check.""" + num_upstream_macros = len(list(self.model.depends_on.macros)) + num_upstream_models = len( + [m for m in self.model.depends_on.nodes if m.split(".")[0] == "model"], + ) + num_upstream_sources = len( + [m for m in self.model.depends_on.nodes if m.split(".")[0] == "source"], + ) - name: Literal["check_model_names"] - model_name_pattern: str + assert ( + num_upstream_macros <= self.max_upstream_macros + ), f"`{self.model.name}` has {num_upstream_macros} upstream macros, which is more than the permitted maximum of {self.max_upstream_macros}." + assert ( + num_upstream_models <= self.max_upstream_models + ), f"`{self.model.name}` has {num_upstream_models} upstream models, which is more than the permitted maximum of {self.max_upstream_models}." + assert ( + num_upstream_sources <= self.max_upstream_sources + ), f"`{self.model.name}` has {num_upstream_sources} upstream sources, which is more than the permitted maximum of {self.max_upstream_sources}." -def check_model_names( - model: "DbtBouncerModel", - model_name_pattern: str, - **kwargs, -) -> None: +class CheckModelNames(BaseCheck): """Models must have a name that matches the supplied regex. Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. model_name_pattern (str): Regexp the model name must match. + Receives: + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. + Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked. @@ -853,20 +863,26 @@ def check_model_names( ``` """ - assert ( - re.compile(model_name_pattern.strip()).match(model.name) is not None - ), f"`{model.name}` does not match the supplied regex `{model_name_pattern.strip()})`." + model_config = ConfigDict(extra="forbid", protected_namespaces=()) -class CheckModelPropertyFileLocation(BaseCheck): - name: Literal["check_model_property_file_location"] + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_names"] + model_name_pattern: str + def execute(self) -> None: + """Execute the check.""" + assert ( + re.compile(self.model_name_pattern.strip()).match(self.model.name) + is not None + ), f"`{self.model.name}` does not match the supplied regex `{self.model_name_pattern.strip()})`." -def check_model_property_file_location(model: "DbtBouncerModel", **kwargs) -> None: + +class CheckModelPropertyFileLocation(BaseCheck): """Model properties files must follow the guidance provided by dbt [here](https://docs.getdbt.com/best-practices/how-we-structure/1-guide-overview). Parameters: - model (DbtBouncerModel): The DbtBouncerModel object to check. + model (DbtBouncerModelBase): The DbtBouncerModelBase object to check. Other Parameters: exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked. @@ -880,56 +896,38 @@ def check_model_property_file_location(model: "DbtBouncerModel", **kwargs) -> No ``` """ - expected_substr = ( - "_".join(model.original_file_path.split("/")[1:-1]) - .replace("staging", "stg") - .replace("intermediate", "int") - .replace("marts", "") - ) - properties_yml_name = model.patch_path.split("/")[-1] - - assert properties_yml_name.startswith( - "_", - ), f"The properties file for `{model.name}` (`{properties_yml_name}`) does not start with an underscore." - assert ( - expected_substr in properties_yml_name - ), f"The properties file for `{model.name}` (`{properties_yml_name}`) does not contain the expected substring (`{expected_substr}`)." - assert properties_yml_name.endswith( - "__models.yml", - ), f"The properties file for `{model.name}` (`{properties_yml_name}`) does not end with `__models.yml`." + model: "DbtBouncerModelBase" = Field(default=None) + name: Literal["check_model_property_file_location"] -class CheckModelsTestCoverage(BaseModel): - model_config = ConfigDict(extra="forbid") + def execute(self) -> None: + """Execute the check.""" + expected_substr = ( + "_".join(self.model.original_file_path.split("/")[1:-1]) + .replace("staging", "stg") + .replace("intermediate", "int") + .replace("marts", "") + ) + properties_yml_name = self.model.patch_path.split("/")[-1] - index: Optional[int] = Field( - default=None, - description="Index to uniquely identify the check, calculated at runtime.", - ) - name: Literal["check_model_test_coverage"] - min_model_test_coverage_pct: float = Field( - default=100, - ge=0, - le=100, - ) - severity: Optional[Literal["error", "warn"]] = Field( - default="error", - description="Severity of the check, one of 'error' or 'warn'.", - ) + assert properties_yml_name.startswith( + "_", + ), f"The properties file for `{self.model.name}` (`{properties_yml_name}`) does not start with an underscore." + assert ( + expected_substr in properties_yml_name + ), f"The properties file for `{self.model.name}` (`{properties_yml_name}`) does not contain the expected substring (`{expected_substr}`)." + assert properties_yml_name.endswith( + "__models.yml", + ), f"The properties file for `{self.model.name}` (`{properties_yml_name}`) does not end with `__models.yml`." -def check_model_test_coverage( - models: List["DbtBouncerModel"], - tests: List["DbtBouncerTest"], - min_model_test_coverage_pct: float = 100, - **kwargs, -) -> None: +class CheckModelsTestCoverage(BaseModel): """Set the minimum percentage of models that have at least one test. Parameters: min_model_test_coverage_pct (float): The minimum percentage of models that must have at least one test. - models (List[DbtBouncerModel]): List of DbtBouncerModel objects parsed from `manifest.json`. - tests (List[DbtBouncerTest]): List of DbtBouncerTest objects parsed from `manifest.json`. + models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. + tests (List[DbtBouncerTestBase]): List of DbtBouncerTestBase objects parsed from `manifest.json`. Other Parameters: severity (Optional[Literal["error", "warn"]]): Severity level of the check. Default: `error`. @@ -943,15 +941,37 @@ def check_model_test_coverage( ``` """ - num_models = len(models) - models_with_tests = [] - for model in models: - for test in tests: - if model.unique_id in test.depends_on.nodes: - models_with_tests.append(model.unique_id) - num_models_with_tests = len(set(models_with_tests)) - model_test_coverage_pct = (num_models_with_tests / num_models) * 100 - - assert ( - model_test_coverage_pct >= min_model_test_coverage_pct - ), f"Only {model_test_coverage_pct}% of models have at least one test, this is less than the permitted minimum of {min_model_test_coverage_pct}%." + + model_config = ConfigDict(extra="forbid") + + index: Optional[int] = Field( + default=None, + description="Index to uniquely identify the check, calculated at runtime.", + ) + name: Literal["check_model_test_coverage"] + min_model_test_coverage_pct: float = Field( + default=100, + ge=0, + le=100, + ) + models: List["DbtBouncerModelBase"] = Field(default=[]) + severity: Optional[Literal["error", "warn"]] = Field( + default="error", + description="Severity of the check, one of 'error' or 'warn'.", + ) + tests: List["DbtBouncerTestBase"] = Field(default=[]) + + def execute(self) -> None: + """Execute the check.""" + num_models = len(self.models) + models_with_tests = [] + for model in self.models: + for test in self.tests: + if model.unique_id in test.depends_on.nodes: + models_with_tests.append(model.unique_id) + num_models_with_tests = len(set(models_with_tests)) + model_test_coverage_pct = (num_models_with_tests / num_models) * 100 + + assert ( + model_test_coverage_pct >= self.min_model_test_coverage_pct + ), f"Only {model_test_coverage_pct}% of models have at least one test, this is less than the permitted minimum of {self.min_model_test_coverage_pct}%." diff --git a/tests/unit/checks/manifest/test_models.py b/tests/unit/checks/manifest/test_models.py index 7eda1fdc..90ccde3f 100644 --- a/tests/unit/checks/manifest/test_models.py +++ b/tests/unit/checks/manifest/test_models.py @@ -11,30 +11,60 @@ UnitTests, ) + from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerManifest, + DbtBouncerModel, + DbtBouncerModelBase, + DbtBouncerTest, + DbtBouncerTestBase, + ) + from dbt_bouncer.checks.manifest.check_models import ( - check_model_access, - check_model_code_does_not_contain_regexp_pattern, - check_model_contract_enforced_for_public_model, - check_model_depends_on_multiple_sources, - check_model_description_populated, - check_model_directories, - check_model_documentation_coverage, - check_model_documented_in_same_directory, - check_model_has_contracts_enforced, - check_model_has_meta_keys, - check_model_has_no_upstream_dependencies, - check_model_has_tags, - check_model_has_unique_test, - check_model_has_unit_tests, - check_model_max_chained_views, - check_model_max_fanout, - check_model_max_number_of_lines, - check_model_max_upstream_dependencies, - check_model_names, - check_model_property_file_location, - check_model_test_coverage, + CheckModelAccess, + CheckModelCodeDoesNotContainRegexpPattern, + CheckModelContractsEnforcedForPublicModel, + CheckModelDependsOnMultipleSources, + CheckModelDescriptionPopulated, + CheckModelDirectories, + CheckModelDocumentedInSameDirectory, + CheckModelHasContractsEnforced, + CheckModelHasMetaKeys, + CheckModelHasNoUpstreamDependencies, + CheckModelHasTags, + CheckModelHasUniqueTest, + CheckModelHasUnitTests, + CheckModelMaxChainedViews, + CheckModelMaxFanout, + CheckModelMaxNumberOfLines, + CheckModelMaxUpstreamDependencies, + CheckModelNames, + CheckModelPropertyFileLocation, + CheckModelsDocumentationCoverage, + CheckModelsTestCoverage, ) +CheckModelAccess.model_rebuild() +CheckModelCodeDoesNotContainRegexpPattern.model_rebuild() +CheckModelContractsEnforcedForPublicModel.model_rebuild() +CheckModelDependsOnMultipleSources.model_rebuild() +CheckModelDescriptionPopulated.model_rebuild() +CheckModelDirectories.model_rebuild() +CheckModelsDocumentationCoverage.model_rebuild() +CheckModelDocumentedInSameDirectory.model_rebuild() +CheckModelHasContractsEnforced.model_rebuild() +CheckModelHasMetaKeys.model_rebuild() +CheckModelHasNoUpstreamDependencies.model_rebuild() +CheckModelHasTags.model_rebuild() +CheckModelHasUniqueTest.model_rebuild() +CheckModelHasUnitTests.model_rebuild() +CheckModelMaxChainedViews.model_rebuild() +CheckModelMaxFanout.model_rebuild() +CheckModelMaxNumberOfLines.model_rebuild() +CheckModelMaxUpstreamDependencies.model_rebuild() +CheckModelNames.model_rebuild() +CheckModelPropertyFileLocation.model_rebuild() +CheckModelsTestCoverage.model_rebuild() + @pytest.mark.parametrize( ("access", "model", "expectation"), @@ -95,10 +125,9 @@ ) def test_check_model_access(access, model, expectation): with expectation: - check_model_access( - access=access, - model=model, - ) + CheckModelAccess( + access=access, model=model, name="check_model_access" + ).execute() @pytest.mark.parametrize( @@ -186,9 +215,9 @@ def test_check_model_access(access, model, expectation): ) def test_check_model_contract_enforced_for_public_model(model, expectation): with expectation: - check_model_contract_enforced_for_public_model( - model=model, - ) + CheckModelContractsEnforcedForPublicModel( + model=model, name="check_model_contract_enforced_for_public_model" + ).execute() @pytest.mark.parametrize( @@ -253,9 +282,9 @@ def test_check_model_contract_enforced_for_public_model(model, expectation): ) def test_check_model_depends_on_multiple_sources(model, expectation): with expectation: - check_model_depends_on_multiple_sources( - model=model, - ) + CheckModelDependsOnMultipleSources( + model=model, name="check_model_depends_on_multiple_sources" + ).execute() @pytest.mark.parametrize( @@ -375,10 +404,11 @@ def test_check_model_documentation_coverage( expectation, ): with expectation: - check_model_documentation_coverage( + CheckModelsDocumentationCoverage( min_model_documentation_coverage_pct=min_model_documentation_coverage_pct, models=models, - ) + name="check_model_documentation_coverage", + ).execute() @pytest.mark.parametrize( @@ -438,9 +468,9 @@ def test_check_model_documentation_coverage( ) def test_check_model_documented_in_same_directory(model, expectation): with expectation: - check_model_documented_in_same_directory( - model=model, - ) + CheckModelDocumentedInSameDirectory( + model=model, name="check_model_documented_in_same_directory" + ).execute() @pytest.mark.parametrize( @@ -500,9 +530,9 @@ def test_check_model_documented_in_same_directory(model, expectation): ) def test_check_model_has_contracts_enforced(model, expectation): with expectation: - check_model_has_contracts_enforced( - model=model, - ) + CheckModelHasContractsEnforced( + model=model, name="check_model_has_contracts_enforced" + ).execute() @pytest.mark.parametrize( @@ -671,10 +701,9 @@ def test_check_model_has_contracts_enforced(model, expectation): ) def test_check_model_has_meta_keys(keys, model, expectation): with expectation: - check_model_has_meta_keys( - keys=keys, - model=model, - ) + CheckModelHasMetaKeys( + keys=keys, model=model, name="check_model_has_meta_keys" + ).execute() @pytest.mark.parametrize( @@ -759,9 +788,9 @@ def test_check_model_has_meta_keys(keys, model, expectation): ) def test_check_model_has_no_upstream_dependencies(model, expectation): with expectation: - check_model_has_no_upstream_dependencies( - model=model, - ) + CheckModelHasNoUpstreamDependencies( + model=model, name="check_model_has_no_upstream_dependencies" + ).execute() @pytest.mark.parametrize( @@ -825,10 +854,11 @@ def test_check_model_has_no_upstream_dependencies(model, expectation): ) def test_check_model_has_tags(model, tags, expectation): with expectation: - check_model_has_tags( + CheckModelHasTags( model=model, + name="check_model_has_tags", tags=tags, - ) + ).execute() @pytest.mark.parametrize( @@ -1022,11 +1052,12 @@ def test_check_model_has_unique_test( expectation, ): with expectation: - check_model_has_unique_test( + CheckModelHasUniqueTest( accepted_uniqueness_tests=accepted_uniqueness_tests, model=model, + name="check_model_has_unique_test", tests=tests, - ) + ).execute() @pytest.mark.parametrize( @@ -1149,12 +1180,13 @@ def test_check_model_has_unit_tests( expectation, ): with expectation: - check_model_has_unit_tests( + CheckModelHasUnitTests( manifest_obj=manifest_obj, min_number_of_unit_tests=min_number_of_unit_tests, model=model, + name="check_model_has_unit_tests", unit_tests=unit_tests, - ) + ).execute() @pytest.mark.parametrize( @@ -1220,10 +1252,11 @@ def test_check_model_code_does_not_contain_regexp_pattern( expectation, ): with expectation: - check_model_code_does_not_contain_regexp_pattern( + CheckModelCodeDoesNotContainRegexpPattern( model=model, + name="check_model_code_does_not_contain_regexp_pattern", regexp_pattern=regexp_pattern, - ) + ).execute() @pytest.mark.parametrize( @@ -1316,11 +1349,12 @@ def test_check_model_directories( expectation, ): with expectation: - check_model_directories( + CheckModelDirectories( include=include, model=model, + name="check_model_directories", permitted_sub_directories=permitted_sub_directories, - ) + ).execute() @pytest.mark.parametrize( @@ -1576,13 +1610,14 @@ def test_check_model_max_chained_views( expectation, ): with expectation: - check_model_max_chained_views( + CheckModelMaxChainedViews( manifest_obj=manifest_obj, materializations_to_include=materializations_to_include, max_chained_views=max_chained_views, model=model, models=models, - ) + name="check_model_max_chained_views", + ).execute() @pytest.mark.parametrize( @@ -1748,11 +1783,12 @@ def test_check_model_max_chained_views( ) def test_check_mode_names(include, model_name_pattern, model, expectation): with expectation: - check_model_names( + CheckModelNames( include=include, model_name_pattern=model_name_pattern, model=model, - ) + name="check_model_names", + ).execute() @pytest.mark.parametrize( @@ -1894,11 +1930,12 @@ def test_check_mode_names(include, model_name_pattern, model, expectation): ) def test_check_model_max_fanout(max_downstream_models, model, models, expectation): with expectation: - check_model_max_fanout( + CheckModelMaxFanout( max_downstream_models=max_downstream_models, model=model, models=models, - ) + name="check_model_max_fanout", + ).execute() @pytest.mark.parametrize( @@ -1962,10 +1999,11 @@ def test_check_model_max_fanout(max_downstream_models, model, models, expectatio ) def test_check_model_max_number_of_lines(max_number_of_lines, model, expectation): with expectation: - check_model_max_number_of_lines( + CheckModelMaxNumberOfLines( max_number_of_lines=max_number_of_lines, model=model, - ) + name="check_model_max_number_of_lines", + ).execute() @pytest.mark.parametrize( @@ -2177,12 +2215,13 @@ def test_check_model_max_upstream_dependencies( expectation, ): with expectation: - check_model_max_upstream_dependencies( + CheckModelMaxUpstreamDependencies( max_upstream_macros=max_upstream_macros, max_upstream_models=max_upstream_models, max_upstream_sources=max_upstream_sources, model=model, - ) + name="check_model_max_upstream_dependencies", + ).execute() @pytest.mark.parametrize( @@ -2342,9 +2381,9 @@ def test_check_model_max_upstream_dependencies( ) def test_check_model_property_file_location(model, expectation): with expectation: - check_model_property_file_location( - model=model, - ) + CheckModelPropertyFileLocation( + model=model, name="check_model_property_file_location" + ).execute() @pytest.mark.parametrize( @@ -2533,9 +2572,9 @@ def test_check_model_property_file_location(model, expectation): ) def test_check_model_description_populated(model, expectation): with expectation: - check_model_description_populated( - model=model, - ) + CheckModelDescriptionPopulated( + model=model, name="check_model_description_populated" + ).execute() @pytest.mark.parametrize( @@ -2766,8 +2805,9 @@ def test_check_model_test_coverage( expectation, ): with expectation: - check_model_test_coverage( + CheckModelsTestCoverage( min_model_test_coverage_pct=min_model_test_coverage_pct, models=models, + name="check_model_test_coverage", tests=tests, - ) + ).execute() From 3578b511d72d3c1399c532c5a303f7334e597b36 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 16:47:58 +0200 Subject: [PATCH 15/17] Removing warning from tests --- .../checks/manifest/check_exposures.py | 11 +++++----- src/dbt_bouncer/config_file_validator.py | 22 +++++++++---------- src/dbt_bouncer/parsers.py | 17 ++++++++++++++ src/dbt_bouncer/runner.py | 14 +++++++----- tests/unit/checks/manifest/test_exposures.py | 6 ++++- .../checks/manifest/test_semantic_models.py | 7 +++--- tests/unit/test_runner.py | 21 ++++++++++++++++-- 7 files changed, 70 insertions(+), 28 deletions(-) diff --git a/src/dbt_bouncer/checks/manifest/check_exposures.py b/src/dbt_bouncer/checks/manifest/check_exposures.py index de2c4efe..f20abee7 100644 --- a/src/dbt_bouncer/checks/manifest/check_exposures.py +++ b/src/dbt_bouncer/checks/manifest/check_exposures.py @@ -12,15 +12,14 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Exposures - from dbt_bouncer.parsers import DbtBouncerModelBase + from dbt_bouncer.parsers import DbtBouncerExposureBase, DbtBouncerModelBase class CheckExposureOnNonPublicModels(BaseCheck): """Exposures should be based on public models only. Receives: - exposure (Exposures): The Exposures object to check. + exposure (DbtBouncerExposureBase): The DbtBouncerExposureBase object to check. models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: @@ -36,7 +35,7 @@ class CheckExposureOnNonPublicModels(BaseCheck): """ - exposure: "Exposures" = Field(default=None) + exposure: "DbtBouncerExposureBase" = Field(default=None) models: List["DbtBouncerModelBase"] = Field(default=[]) name: Literal["check_exposure_based_on_non_public_models"] @@ -62,7 +61,7 @@ class CheckExposureOnView(BaseCheck): materializations_to_include (Optional[List[str]]): List of materializations to include in the check. Receives: - exposure (Exposures): The Exposures object to check. + exposure (DbtBouncerExposureBase): The DbtBouncerExposureBase object to check. models (List[DbtBouncerModelBase]): List of DbtBouncerModelBase objects parsed from `manifest.json`. Other Parameters: @@ -86,7 +85,7 @@ class CheckExposureOnView(BaseCheck): """ - exposure: "Exposures" = Field(default=None) + exposure: "DbtBouncerExposureBase" = Field(default=None) materializations_to_include: List[str] = Field( default=["ephemeral", "view"], ) diff --git a/src/dbt_bouncer/config_file_validator.py b/src/dbt_bouncer/config_file_validator.py index c48d5fba..dc356261 100644 --- a/src/dbt_bouncer/config_file_validator.py +++ b/src/dbt_bouncer/config_file_validator.py @@ -181,16 +181,22 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": # Rebuild the model to ensure all fields are present import warnings - from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( - Exposures, # noqa: F401 - Macros, # noqa: F401 - UnitTests, # noqa: F401 - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + from dbt_artifacts_parser.parsers.catalog.catalog_v1 import ( + CatalogTable, # noqa: F401 + ) + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( + Exposures, # noqa: F401 + Macros, # noqa: F401 + UnitTests, # noqa: F401 + ) import dbt_bouncer.checks # noqa: F401 from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.parsers import ( # noqa: F401 DbtBouncerCatalogNode, + DbtBouncerExposureBase, DbtBouncerManifest, DbtBouncerModel, DbtBouncerModelBase, @@ -204,12 +210,6 @@ def validate_conf(config_file_contents: Dict[str, Any]) -> "DbtBouncerConf": DbtBouncerTestBase, ) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - from dbt_artifacts_parser.parsers.catalog.catalog_v1 import ( - CatalogTable, # noqa: F401 - ) - DbtBouncerConf.model_rebuild() return DbtBouncerConf(**config_file_contents) diff --git a/src/dbt_bouncer/parsers.py b/src/dbt_bouncer/parsers.py index 9d360889..9416b580 100644 --- a/src/dbt_bouncer/parsers.py +++ b/src/dbt_bouncer/parsers.py @@ -17,6 +17,9 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogTable, CatalogV1 + from dbt_artifacts_parser.parsers.manifest.manifest_v10 import ( + Exposure as Exposure_v10, + ) from dbt_artifacts_parser.parsers.manifest.manifest_v10 import ( GenericTestNode as GenericTestNode_v10, ) @@ -30,6 +33,9 @@ from dbt_artifacts_parser.parsers.manifest.manifest_v10 import ( SourceDefinition as SourceDefinition_v10, ) + from dbt_artifacts_parser.parsers.manifest.manifest_v11 import ( + Exposure as Exposure_v11, + ) from dbt_artifacts_parser.parsers.manifest.manifest_v11 import ( GenericTestNode as GenericTestNode_v11, ) @@ -81,6 +87,17 @@ class DbtBouncerManifest(BaseModel): manifest: Union[ManifestV10, ManifestV11, ManifestV12] +DbtBouncerExposureBase = Union[Exposure_v10, Exposure_v11, Exposures] + + +class DbtBouncerExposure(BaseModel): + """Model for all exposure nodes in `manifest.json`.""" + + model: DbtBouncerExposureBase + original_file_path: str + unique_id: str + + DbtBouncerModelBase = Union[ModelNode_v10, ModelNode_v11, Nodes4] diff --git a/src/dbt_bouncer/runner.py b/src/dbt_bouncer/runner.py index cf075e22..815a52ad 100644 --- a/src/dbt_bouncer/runner.py +++ b/src/dbt_bouncer/runner.py @@ -20,11 +20,15 @@ ) if TYPE_CHECKING: - from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( - Exposures, - Macros, - UnitTests, - ) + import warnings + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( + Exposures, + Macros, + UnitTests, + ) from dbt_bouncer.config_file_validator import DbtBouncerConf from dbt_bouncer.parsers import ( diff --git a/tests/unit/checks/manifest/test_exposures.py b/tests/unit/checks/manifest/test_exposures.py index 3e817461..3007f7a6 100644 --- a/tests/unit/checks/manifest/test_exposures.py +++ b/tests/unit/checks/manifest/test_exposures.py @@ -11,7 +11,11 @@ CheckExposureOnNonPublicModels, CheckExposureOnView, ) -from dbt_bouncer.parsers import DbtBouncerModel, DbtBouncerModelBase # noqa: F401 +from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerExposureBase, + DbtBouncerModel, + DbtBouncerModelBase, +) CheckExposureOnNonPublicModels.model_rebuild() CheckExposureOnView.model_rebuild() diff --git a/tests/unit/checks/manifest/test_semantic_models.py b/tests/unit/checks/manifest/test_semantic_models.py index 28d9e554..6b77ce31 100644 --- a/tests/unit/checks/manifest/test_semantic_models.py +++ b/tests/unit/checks/manifest/test_semantic_models.py @@ -5,9 +5,10 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4 - -from dbt_artifacts_parser.parsers.manifest.manifest_v12 import SemanticModels + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( + Nodes4, + SemanticModels, + ) from dbt_bouncer.checks.manifest.check_semantic_models import ( CheckSemanticModelOnNonPublicModels, diff --git a/tests/unit/test_runner.py b/tests/unit/test_runner.py index 5ea91c93..8e8f3381 100644 --- a/tests/unit/test_runner.py +++ b/tests/unit/test_runner.py @@ -5,7 +5,15 @@ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) from dbt_artifacts_parser.parser import parse_manifest - from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4 + from dbt_artifacts_parser.parsers.catalog.catalog_v1 import ( + CatalogTable, # noqa: F401 + ) + from dbt_artifacts_parser.parsers.manifest.manifest_v12 import ( # noqa: F401 + Exposures, + Macros, + Nodes4, + UnitTests, + ) from unittest.mock import MagicMock @@ -14,7 +22,16 @@ from dbt_bouncer.checks.common import NestedDict # noqa: F401 from dbt_bouncer.config_file_validator import DbtBouncerConf from dbt_bouncer.logger import configure_console_logging -from dbt_bouncer.parsers import DbtBouncerModel +from dbt_bouncer.parsers import ( # noqa: F401 + DbtBouncerExposureBase, + DbtBouncerManifest, + DbtBouncerModel, + DbtBouncerModelBase, + DbtBouncerRunResultBase, + DbtBouncerSemanticModelBase, + DbtBouncerSourceBase, + DbtBouncerTestBase, +) from dbt_bouncer.runner import runner DbtBouncerConf.model_rebuild() From 2621fc9e7e97e961c1a0b0c1d10a48630d43d15f Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 16:59:40 +0200 Subject: [PATCH 16/17] Minor fixes for MKdocs --- dbt-bouncer-example.yml | 6 +++--- .../templates/python/material/docstring/receives.html.jinja | 4 ++-- mkdocs.yml | 1 - src/dbt_bouncer/checks/catalog/check_columns.py | 2 +- src/dbt_bouncer/checks/run_results/check_run_results.py | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/dbt-bouncer-example.yml b/dbt-bouncer-example.yml index 33faa91f..620bf448 100644 --- a/dbt-bouncer-example.yml +++ b/dbt-bouncer-example.yml @@ -1,7 +1,7 @@ dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artifacts exists, generally the `target` directory inside a dbt project. Defaults to `./target`. -# # exclude: ^models/staging # Optional, here for demonstration purposes only -# # include: ^models/marts # Optional, here for demonstration purposes only -# # severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. +# exclude: ^models/staging # Optional, here for demonstration purposes only +# include: ^models/marts # Optional, here for demonstration purposes only +# severity: warn # Optional, useful when applying `dbt-bouncer` to an existing dbt project. catalog_checks: - name: check_column_description_populated diff --git a/docs/templates/python/material/docstring/receives.html.jinja b/docs/templates/python/material/docstring/receives.html.jinja index a3bb29ca..bb3f4460 100644 --- a/docs/templates/python/material/docstring/receives.html.jinja +++ b/docs/templates/python/material/docstring/receives.html.jinja @@ -4,7 +4,7 @@ - {% if name_column %}{% endif %} + @@ -12,7 +12,7 @@ {% for receives in section.value %} - {% if name_column %}{% endif %} +
{{ lang.t("Name") }}{{ lang.t("Name") }} {{ lang.t("Type") }} {{ lang.t("Description") }}
{% if receives.name %}{{ receives.name }}{% endif %}{% if receives.name %}{{ receives.name }}{% endif %} {% if receives.annotation %} {% with expression = receives.annotation %} diff --git a/mkdocs.yml b/mkdocs.yml index 0fb9abdb..e3c5dcce 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -62,7 +62,6 @@ plugins: - src/dbt_bouncer/checks options: docstring_style: google - # filters: ["check_"] filters: ["Check"] group_by_category: true heading_level: 1 diff --git a/src/dbt_bouncer/checks/catalog/check_columns.py b/src/dbt_bouncer/checks/catalog/check_columns.py index 77b25f45..fa07b280 100644 --- a/src/dbt_bouncer/checks/catalog/check_columns.py +++ b/src/dbt_bouncer/checks/catalog/check_columns.py @@ -62,7 +62,7 @@ class CheckColumnNameCompliesToColumnType(BaseCheck): """Columns with specified data types must comply to the specified regexp naming pattern. Parameters: - column_name_pattern: (str): Regex pattern to match the model name. + column_name_pattern (str): Regex pattern to match the model name. types (List[str]): List of data types to check. Receives: diff --git a/src/dbt_bouncer/checks/run_results/check_run_results.py b/src/dbt_bouncer/checks/run_results/check_run_results.py index 5c81eeb7..e66881e9 100644 --- a/src/dbt_bouncer/checks/run_results/check_run_results.py +++ b/src/dbt_bouncer/checks/run_results/check_run_results.py @@ -38,7 +38,7 @@ class CheckRunResultsMaxGigabytesBilled(BaseCheck): ```yaml run_results_checks: - name: check_run_results_max_gigabytes_billed - max_gigabytes_billed: 100 + max_gigabytes_billed: 100 ``` """ From b116ebcb9bb999365682631d05e57085d74324c5 Mon Sep 17 00:00:00 2001 From: pgoslatara Date: Mon, 2 Sep 2024 21:21:23 +0200 Subject: [PATCH 17/17] Updating docs on adding new checks --- docs/CONTRIBUTING.md | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 62e7cba9..f235d8fd 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -146,20 +146,14 @@ To add a new check follow the below steps: 1. In `./src/dbt_bouncer/checks` choose the appropriate directory for your check. For example, if your check only requires the `manifest.json` then use the `manifest` directory, if your check requires the `catalog.json` then use the `catalog` directory. 1. Within the chosen directory assess if a suitable file already exists. For example, if your check applies to a model then `manifest/check_models.py` is a suitable location. -1. Within the chosen file, add both a class and a function: - - - `class`: The class is a pydantic model defining the input arguments and must meet the following criteria: - - Start with "Check". - - Inherit from `dbt_bouncer.conf_validator_base.BaseCheck`. - - Have a `name` attribute that is a string. - - Not use `description` in a `Field`. - - A `default` value provided for optional input arguments. - - - `function`: The function must meet the following criteria: - - Be called after the snake case equivalent of the `name` attribute of the created class. - - Accept `**kwargs`. - - Have a doc string that includes a description of the check, a list of possible input parameters and at least one example. - - A clear message in the event of a failure. +1. Within the chosen file, add a Pydantic model, this object must meet the following criteria: + + - Start with "Check". + - Inherit from `dbt_bouncer.check_base.BaseCheck`. + - Have a `name` attribute that is a string whose value is the snake case equivalent of the class name. + - A `default` value provided for optional input arguments and arguments that are received at execution time. + - Have a doc string that includes a description of the check, a list of possible input parameters and at least one example. + - A clear message in the event of a failure. 1. After the check is added, add the check to `dbt-bouncer-example.yml` and run `dbt-bouncer --config-file dbt-bouncer-example.yml` to ensure the check succeeds. 1. (Optional) If the dbt project located in `./dbt_project` needs to be updated then do so and also run `make build-artifacts` to generate the new test artifacts.