Skip to content

Commit

Permalink
Allow rules to be skipped (#66)
Browse files Browse the repository at this point in the history
Solves #48 (partially?)

Introduce `ModelFilter`, which allows `Rule` objects to be skipped, by
filtering on a `Model`. Adding filters is done in a similar way as
rules, and can be configured using the `pyproject.toml` configuration.

---------

Co-authored-by: Jochem van Dooren <[email protected]>
Co-authored-by: Jochem van Dooren <[email protected]>
  • Loading branch information
3 people authored Aug 15, 2024
1 parent 05f29a6 commit af26d7b
Show file tree
Hide file tree
Showing 19 changed files with 411 additions and 24 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to

## [Unreleased]

- Add model filters to let models be ignored by certain rules.

## [0.4.0] - 2024-08-08

- Add null check before calling `project_evaluated` in the `evaluate` method to
Expand Down
3 changes: 3 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ Every rule can be configured with the following option:
- `severity`: The severity of the rule. Rules have a default severity and can be
overridden. It's an integer with a minimum value of 1 and a maximum value
of 4.
- `model_filter_names`: Filters used by the rule. Takes a list of names that can
be found in the same namespace as the rules (see
[Package rules](package_rules.md)).

Some rules have additional configuration options, e.g.
[sql_has_reasonable_number_of_lines](rules/generic.md#sql_has_reasonable_number_of_lines).
Expand Down
39 changes: 39 additions & 0 deletions docs/create_rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,42 @@ def sql_has_reasonable_number_of_lines(model: Model, max_lines: int = 200) -> Ru
message=f"SQL query too long: {count_lines} lines (> {max_lines})."
)
```

### Filtering models

Custom and standard rules can be configured to have model filters. Filters allow
models to be ignored by one or multiple rules.

Filters are created using the same discovery mechanism and interface as custom
rules, except they do not accept parameters. Similar to Python's built-in
`filter` function, when the filter evaluation returns `True` the model should be
evaluated, otherwise it should be ignored.

```python
from dbt_score import ModelFilter, model_filter

@model_filter
def only_schema_x(model: Model) -> bool:
"""Only applies a rule to schema X."""
return model.schema.lower() == 'x'

class SkipSchemaY(ModelFilter):
description = "Applies a rule to every schema but Y."
def evaluate(self, model: Model) -> bool:
return model.schema.lower() != 'y'
```

Similar to setting a rule severity, standard rules can have filters set in the
[configuration file](configuration.md/#tooldbt-scorerulesrule_namespacerule_name),
while custom rules accept the configuration file or a decorator parameter.

```python
from dbt_score import Model, rule, RuleViolation
from my_project import only_schema_x

@rule(model_filters={only_schema_x()})
def models_in_x_follow_naming_standard(model: Model) -> RuleViolation | None:
"""Models in schema X must follow the naming standard."""
if some_regex_fails(model.name):
return RuleViolation("Invalid model name.")
```
11 changes: 10 additions & 1 deletion src/dbt_score/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
"""Init dbt_score package."""

from dbt_score.model_filter import ModelFilter, model_filter
from dbt_score.models import Model
from dbt_score.rule import Rule, RuleViolation, Severity, rule

__all__ = ["Model", "Rule", "RuleViolation", "Severity", "rule"]
__all__ = [
"Model",
"ModelFilter",
"Rule",
"RuleViolation",
"Severity",
"model_filter",
"rule",
]
6 changes: 3 additions & 3 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def evaluate(self) -> None:
self.results[model] = {}
for rule in rules:
try:
result: RuleViolation | None = rule.evaluate(model, **rule.config)
if rule.should_evaluate(model): # Consider model filter(s).
result = rule.evaluate(model, **rule.config)
self.results[model][rule.__class__] = result
except Exception as e:
self.results[model][rule.__class__] = e
else:
self.results[model][rule.__class__] = result

self.scores[model] = self._scorer.score_model(self.results[model])
self._formatter.model_evaluated(
Expand Down
115 changes: 115 additions & 0 deletions src/dbt_score/model_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Model filtering to choose when to apply specific rules."""

from typing import Any, Callable, Type, TypeAlias, overload

from dbt_score.models import Model

FilterEvaluationType: TypeAlias = Callable[[Model], bool]


class ModelFilter:
"""The Filter base class."""

description: str

def __init__(self) -> None:
"""Initialize the filter."""
pass

def __init_subclass__(cls, **kwargs) -> None: # type: ignore
"""Initializes the subclass."""
super().__init_subclass__(**kwargs)
if not hasattr(cls, "description"):
raise AttributeError("Subclass must define class attribute `description`.")

def evaluate(self, model: Model) -> bool:
"""Evaluates the filter."""
raise NotImplementedError("Subclass must implement method `evaluate`.")

@classmethod
def source(cls) -> str:
"""Return the source of the filter, i.e. a fully qualified name."""
return f"{cls.__module__}.{cls.__name__}"

def __hash__(self) -> int:
"""Compute a unique hash for a filter."""
return hash(self.source())


# Use @overload to have proper typing for both @model_filter and @model_filter(...)
# https://mypy.readthedocs.io/en/stable/generics.html#decorator-factories


@overload
def model_filter(__func: FilterEvaluationType) -> Type[ModelFilter]:
...


@overload
def model_filter(
*,
description: str | None = None,
) -> Callable[[FilterEvaluationType], Type[ModelFilter]]:
...


def model_filter(
__func: FilterEvaluationType | None = None,
*,
description: str | None = None,
) -> Type[ModelFilter] | Callable[[FilterEvaluationType], Type[ModelFilter]]:
"""Model-filter decorator.
The model-filter decorator creates a filter class (subclass of ModelFilter)
and returns it.
Using arguments or not are both supported:
- ``@model_filter``
- ``@model_filter(description="...")``
Args:
__func: The filter evaluation function being decorated.
description: The description of the filter.
"""

def decorator_filter(
func: FilterEvaluationType,
) -> Type[ModelFilter]:
"""Decorator function."""
if func.__doc__ is None and description is None:
raise AttributeError(
"ModelFilter must define `description` or `func.__doc__`."
)

# Get description parameter, otherwise use the docstring
filter_description = description or (
func.__doc__.split("\n")[0] if func.__doc__ else None
)

def wrapped_func(self: ModelFilter, *args: Any, **kwargs: Any) -> bool:
"""Wrap func to add `self`."""
return func(*args, **kwargs)

# Create the filter class inheriting from ModelFilter
filter_class = type(
func.__name__,
(ModelFilter,),
{
"description": filter_description,
"evaluate": wrapped_func,
# Save provided evaluate function
"_orig_evaluate": func,
# Forward origin of the decorated function
"__qualname__": func.__qualname__, # https://peps.python.org/pep-3155/
"__module__": func.__module__,
},
)

return filter_class

if __func is not None:
# The syntax @model_filter is used
return decorator_filter(__func)
else:
# The syntax @model_filter(...) is used
return decorator_filter
32 changes: 30 additions & 2 deletions src/dbt_score/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import typing
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Callable, Type, TypeAlias, overload
from typing import Any, Callable, Iterable, Type, TypeAlias, overload

from dbt_score.model_filter import ModelFilter
from dbt_score.models import Model


Expand All @@ -24,6 +25,7 @@ class RuleConfig:

severity: Severity | None = None
config: dict[str, Any] = field(default_factory=dict)
model_filter_names: list[str] = field(default_factory=list)

@staticmethod
def from_dict(rule_config: dict[str, Any]) -> "RuleConfig":
Expand All @@ -34,8 +36,15 @@ def from_dict(rule_config: dict[str, Any]) -> "RuleConfig":
if "severity" in rule_config
else None
)
filter_names = (
config.pop("model_filter_names", None)
if "model_filter_names" in rule_config
else []
)

return RuleConfig(severity=severity, config=config)
return RuleConfig(
severity=severity, config=config, model_filter_names=filter_names
)


@dataclass
Expand All @@ -53,6 +62,8 @@ class Rule:

description: str
severity: Severity = Severity.MEDIUM
model_filter_names: list[str]
model_filters: frozenset[ModelFilter] = frozenset()
default_config: typing.ClassVar[dict[str, Any]] = {}

def __init__(self, rule_config: RuleConfig | None = None) -> None:
Expand Down Expand Up @@ -83,17 +94,30 @@ def process_config(self, rule_config: RuleConfig) -> None:
self.set_severity(
rule_config.severity
) if rule_config.severity else rule_config.severity
self.model_filter_names = rule_config.model_filter_names
self.config = config

def evaluate(self, model: Model) -> RuleViolation | None:
"""Evaluates the rule."""
raise NotImplementedError("Subclass must implement method `evaluate`.")

@classmethod
def should_evaluate(cls, model: Model) -> bool:
"""Checks if all filters in the rule allow evaluation."""
if cls.model_filters:
return all(f.evaluate(model) for f in cls.model_filters)
return True

@classmethod
def set_severity(cls, severity: Severity) -> None:
"""Set the severity of the rule."""
cls.severity = severity

@classmethod
def set_filters(cls, model_filters: Iterable[ModelFilter]) -> None:
"""Set the filters of the rule."""
cls.model_filters = frozenset(model_filters)

@classmethod
def source(cls) -> str:
"""Return the source of the rule, i.e. a fully qualified name."""
Expand All @@ -118,6 +142,7 @@ def rule(
*,
description: str | None = None,
severity: Severity = Severity.MEDIUM,
model_filters: set[ModelFilter] | None = None,
) -> Callable[[RuleEvaluationType], Type[Rule]]:
...

Expand All @@ -127,6 +152,7 @@ def rule(
*,
description: str | None = None,
severity: Severity = Severity.MEDIUM,
model_filters: set[ModelFilter] | None = None,
) -> Type[Rule] | Callable[[RuleEvaluationType], Type[Rule]]:
"""Rule decorator.
Expand All @@ -140,6 +166,7 @@ def rule(
__func: The rule evaluation function being decorated.
description: The description of the rule.
severity: The severity of the rule.
model_filters: Set of ModelFilter that filters the rule.
"""

def decorator_rule(
Expand Down Expand Up @@ -172,6 +199,7 @@ def wrapped_func(self: Rule, *args: Any, **kwargs: Any) -> RuleViolation | None:
{
"description": rule_description,
"severity": severity,
"model_filters": model_filters or frozenset(),
"default_config": default_config,
"evaluate": wrapped_func,
# Save provided evaluate function
Expand Down
Loading

0 comments on commit af26d7b

Please sign in to comment.