Skip to content

Commit

Permalink
Centralize Where filter validation + comments
Browse files Browse the repository at this point in the history
This is not *exactly* what was described in Courtney's comments, but I
think there was a little roughness in that plan once I began implementing
(but I'm happy to change course if reviewers disagree!).

This commit promotes the `WhereFiltersAreParseable` to be
`WhereFiltersAreParseableRule` (a free-standing 'rule' in a separate
file).  It was weird to be passing things in from other classes but
somehow centralizing the manifest, so instead, I just moved ALL of
the relevant checks here.

This moves the tests for where filters to a new file specifically
for this rule (again, I'm open to the idea that this would be better
just divided amongst the existing tests, but they share so much
conceptually that it seems nice to group them together and to have
a test that is pointed 1:1 at a rule where possible).

Finally, I also moved some of the test support functions
(`check_only_one_error_with_message`, for example) to
`dbt_semantic_interfaces.test_utils` because they seem useful and
reusable.
  • Loading branch information
theyostalservice committed Oct 28, 2024
1 parent c5fca35 commit b7f84b8
Show file tree
Hide file tree
Showing 9 changed files with 623 additions and 499 deletions.
2 changes: 1 addition & 1 deletion .changes/unreleased/Under the Hood-20241023-180425.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
kind: Under the Hood
body: Added validation warnings for invalid time spines in where filters of saved queries.
body: Added validation warnings for invalid granularity names in where filters of saved queries.
time: 2024-10-23T18:04:25.235887-07:00
custom:
Author: theyostalservice
Expand Down
39 changes: 39 additions & 0 deletions dbt_semantic_interfaces/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
)
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity
from dbt_semantic_interfaces.validations.validator_helpers import (
SemanticManifestValidationResults,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -169,3 +172,39 @@ def semantic_model_with_guaranteed_meta(
dimensions=dimensions,
metadata=metadata,
)


def check_only_one_error_with_message( # noqa: D
results: SemanticManifestValidationResults, target_message: str
) -> None:
assert len(results.warnings) == 0
assert len(results.errors) == 1
assert len(results.future_errors) == 0

found_match = results.errors[0].message.find(target_message) != -1
# Adding this dict to the assert so that when it does not match, pytest prints the expected and actual values.
assert {
"expected": target_message,
"actual": results.errors[0].message,
} and found_match


def check_only_one_warning_with_message( # noqa: D
results: SemanticManifestValidationResults, target_message: str
) -> None:
assert len(results.errors) == 0
assert len(results.warnings) == 1
assert len(results.future_errors) == 0

found_match = results.warnings[0].message.find(target_message) != -1
# Adding this dict to the assert so that when it does not match, pytest prints the expected and actual values.
assert {
"expected": target_message,
"actual": results.warnings[0].message,
} and found_match


def check_no_errors_or_warnings(results: SemanticManifestValidationResults) -> None: # noqa: D
assert len(results.errors) == 0
assert len(results.warnings) == 0
assert len(results.future_errors) == 0
175 changes: 1 addition & 174 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import traceback
from typing import Dict, Generic, List, Optional, Sequence, Tuple
from typing import Dict, Generic, List, Optional, Sequence

from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
Expand Down Expand Up @@ -35,7 +34,6 @@
ValidationError,
ValidationIssue,
ValidationWarning,
generate_exception_issue,
validate_safely,
)

Expand Down Expand Up @@ -244,177 +242,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
return issues


class WhereFiltersAreParseable(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Validates that all Metric WhereFilters are parseable."""

@staticmethod
def _validate_time_granularity_names(
context: MetricContext,
filter_expression_parameter_sets: Sequence[Tuple[str, FilterCallParameterSets]],
custom_granularity_names: List[str],
) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []

valid_granularity_names = [
standard_granularity.value for standard_granularity in TimeGranularity
] + custom_granularity_names
for _, parameter_set in filter_expression_parameter_sets:
for time_dim_call_parameter_set in parameter_set.time_dimension_call_parameter_sets:
if not time_dim_call_parameter_set.time_granularity_name:
continue
if time_dim_call_parameter_set.time_granularity_name.lower() not in valid_granularity_names:
issues.append(
ValidationWarning(
context=context,
message=f"Filter for metric `{context.metric.metric_name}` is not valid. "
f"`{time_dim_call_parameter_set.time_granularity_name}` is not a valid granularity name. "
f"Valid granularity options: {valid_granularity_names}",
)
)
return issues

@staticmethod
@validate_safely(
whats_being_done="running model validation ensuring a metric's filter properties are configured properly"
)
def _validate_metric(metric: Metric, custom_granularity_names: List[str]) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []
context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)

if metric.filter is not None:
try:
metric.filter.filter_expression_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse filter of metric `{metric.name}`",
e=e,
context=context,
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += WhereFiltersAreParseable._validate_time_granularity_names(
context=context,
filter_expression_parameter_sets=metric.filter.filter_expression_parameter_sets,
custom_granularity_names=custom_granularity_names,
)

if metric.type_params:
measure = metric.type_params.measure
if measure is not None and measure.filter is not None:
try:
measure.filter.filter_expression_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse filter of measure input `{measure.name}` "
f"on metric `{metric.name}`",
e=e,
context=context,
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += WhereFiltersAreParseable._validate_time_granularity_names(
context=context,
filter_expression_parameter_sets=measure.filter.filter_expression_parameter_sets,
custom_granularity_names=custom_granularity_names,
)

numerator = metric.type_params.numerator
if numerator is not None and numerator.filter is not None:
try:
numerator.filter.filter_expression_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse the numerator filter on metric `{metric.name}`",
e=e,
context=context,
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += WhereFiltersAreParseable._validate_time_granularity_names(
context=context,
filter_expression_parameter_sets=numerator.filter.filter_expression_parameter_sets,
custom_granularity_names=custom_granularity_names,
)

denominator = metric.type_params.denominator
if denominator is not None and denominator.filter is not None:
try:
denominator.filter.filter_expression_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse the denominator filter on metric `{metric.name}`",
e=e,
context=context,
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += WhereFiltersAreParseable._validate_time_granularity_names(
context=context,
filter_expression_parameter_sets=denominator.filter.filter_expression_parameter_sets,
custom_granularity_names=custom_granularity_names,
)

for input_metric in metric.type_params.metrics or []:
if input_metric.filter is not None:
try:
input_metric.filter.filter_expression_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse filter for input metric `{input_metric.name}` "
f"on metric `{metric.name}`",
e=e,
context=context,
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += WhereFiltersAreParseable._validate_time_granularity_names(
context=context,
filter_expression_parameter_sets=input_metric.filter.filter_expression_parameter_sets,
custom_granularity_names=custom_granularity_names,
)

# TODO: Are saved query filters being validated? Task: SL-2932
return issues

@staticmethod
@validate_safely(whats_being_done="running manifest validation ensuring all metric where filters are parseable")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []
custom_granularity_names = [
granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]
for metric in semantic_manifest.metrics or []:
issues += WhereFiltersAreParseable._validate_metric(
metric=metric, custom_granularity_names=custom_granularity_names
)
return issues


class ConversionMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that conversion metrics are configured properly."""

Expand Down
70 changes: 0 additions & 70 deletions dbt_semantic_interfaces/validations/saved_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,13 @@
)
from dbt_semantic_interfaces.protocols import SemanticManifestT
from dbt_semantic_interfaces.protocols.saved_query import SavedQuery
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
SavedQueryContext,
SavedQueryElementType,
SemanticManifestValidationRule,
ValidationError,
ValidationIssue,
ValidationWarning,
generate_exception_issue,
validate_safely,
)
Expand Down Expand Up @@ -114,68 +112,6 @@ def _check_metrics(valid_metric_names: Set[str], saved_query: SavedQuery) -> Seq
)
return issues

@staticmethod
@validate_safely("Validate the where field in a saved query.")
def _check_where(saved_query: SavedQuery, custom_granularity_names: list[str]) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []
if saved_query.query_params.where is None:
return issues
for where_filter in saved_query.query_params.where.where_filters:
try:
where_filter.call_parameter_sets
except Exception as e:
issues.append(
generate_exception_issue(
what_was_being_done=f"trying to parse a filter in saved query `{saved_query.name}`",
e=e,
context=SavedQueryContext(
file_context=FileContext.from_metadata(metadata=saved_query.metadata),
element_type=SavedQueryElementType.WHERE,
element_value=where_filter.where_sql_template,
),
extras={
"traceback": "".join(traceback.format_tb(e.__traceback__)),
},
)
)
else:
issues += SavedQueryRule._check_where_timespine(saved_query, custom_granularity_names)

return issues

def _check_where_timespine(
saved_query: SavedQuery, custom_granularity_names: list[str]
) -> Sequence[ValidationIssue]:
where_param = saved_query.query_params.where
if where_param is None:
return []

issues: List[ValidationIssue] = []

valid_granularity_names = [
standard_granularity.name for standard_granularity in TimeGranularity
] + custom_granularity_names

for where_filter in where_param.where_filters:
for time_dim_call_parameter_set in where_filter.call_parameter_sets.time_dimension_call_parameter_sets:
if not time_dim_call_parameter_set.time_granularity_name:
continue
if time_dim_call_parameter_set.time_granularity_name not in valid_granularity_names:
issues.append(
ValidationWarning(
context=SavedQueryContext(
file_context=FileContext.from_metadata(metadata=saved_query.metadata),
element_type=SavedQueryElementType.WHERE,
element_value=where_filter.where_sql_template,
),
# message=f"Filter for metric `{context.metric.metric_name}` is not valid. "
message=f"Filter for saved query `{saved_query.name}` is not valid. "
f"`{time_dim_call_parameter_set.time_granularity_name}` is not a valid granularity name. "
f"Valid granularity options: {valid_granularity_names}",
)
)
return issues

@staticmethod
def _parse_query_item(
saved_query: SavedQuery,
Expand Down Expand Up @@ -317,11 +253,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
for entity in semantic_model.entities:
valid_group_by_element_names.add(entity.name)

custom_granularity_names = [
granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]
for saved_query in semantic_manifest.saved_queries:
issues += SavedQueryRule._check_metrics(
valid_metric_names=valid_metric_names,
Expand All @@ -331,7 +262,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
valid_group_by_element_names=valid_group_by_element_names,
saved_query=saved_query,
)
issues += SavedQueryRule._check_where(saved_query, custom_granularity_names)
issues += SavedQueryRule._check_order_by(saved_query)
issues += SavedQueryRule._check_limit(saved_query)
return issues
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
ConversionMetricRule,
CumulativeMetricRule,
DerivedMetricRule,
WhereFiltersAreParseable,
)
from dbt_semantic_interfaces.validations.non_empty import NonEmptyRule
from dbt_semantic_interfaces.validations.primary_entity import PrimaryEntityRule
Expand All @@ -47,6 +46,9 @@
SemanticManifestValidationResults,
SemanticManifestValidationRule,
)
from dbt_semantic_interfaces.validations.where_filters import (
WhereFiltersAreParseableRule,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -86,7 +88,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]):
SemanticModelDefaultsRule[SemanticManifestT](),
PrimaryEntityRule[SemanticManifestT](),
PrimaryEntityDimensionPairs[SemanticManifestT](),
WhereFiltersAreParseable[SemanticManifestT](),
WhereFiltersAreParseableRule[SemanticManifestT](),
SavedQueryRule[SemanticManifestT](),
MetricLabelsRule[SemanticManifestT](),
SemanticModelLabelsRule[SemanticManifestT](),
Expand Down
Loading

0 comments on commit b7f84b8

Please sign in to comment.