Centralize Where filter validation + comments

This is not *exactly* what was described in Courtney's comments, but I think there was a little roughness in that plan once I began implementing (but I'm happy to change course if reviewers disagree!). This commit promotes the `WhereFiltersAreParseable` to be `WhereFiltersAreParseableRule` (a free-standing 'rule' in a separate file). It was weird to be passing things in from other classes but somehow centralizing the manifest, so instead, I just moved ALL of the relevant checks here. This moves the tests for where filters to a new file specifically for this rule (again, I'm open to the idea that this would be better just divided amongst the existing tests, but they share so much conceptually that it seems nice to group them together and to have a test that is pointed 1:1 at a rule where possible). Finally, I also moved some of the test support functions (`check_only_one_error_with_message`, for example) to `dbt_semantic_interfaces.test_utils` because they seem useful and reusable.
dbt-labs · Oct 28, 2024 · b7f84b8 · b7f84b8
1 parent c5fca35
commit b7f84b8
Show file tree

Hide file tree

Showing 9 changed files with 623 additions and 499 deletions.
diff --git a/.changes/unreleased/Under the Hood-20241023-180425.yaml b/.changes/unreleased/Under the Hood-20241023-180425.yaml
@@ -1,5 +1,5 @@
 kind: Under the Hood
-body: Added validation warnings for invalid time spines in where filters of saved queries.
+body: Added validation warnings for invalid granularity names in where filters of saved queries.
 time: 2024-10-23T18:04:25.235887-07:00
 custom:
     Author: theyostalservice

diff --git a/dbt_semantic_interfaces/test_utils.py b/dbt_semantic_interfaces/test_utils.py
@@ -25,6 +25,9 @@
 )
 from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
 from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity
+from dbt_semantic_interfaces.validations.validator_helpers import (
+    SemanticManifestValidationResults,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -169,3 +172,39 @@ def semantic_model_with_guaranteed_meta(
         dimensions=dimensions,
         metadata=metadata,
     )
+
+
+def check_only_one_error_with_message(  # noqa: D
+    results: SemanticManifestValidationResults, target_message: str
+) -> None:
+    assert len(results.warnings) == 0
+    assert len(results.errors) == 1
+    assert len(results.future_errors) == 0
+
+    found_match = results.errors[0].message.find(target_message) != -1
+    # Adding this dict to the assert so that when it does not match, pytest prints the expected and actual values.
+    assert {
+        "expected": target_message,
+        "actual": results.errors[0].message,
+    } and found_match
+
+
+def check_only_one_warning_with_message(  # noqa: D
+    results: SemanticManifestValidationResults, target_message: str
+) -> None:
+    assert len(results.errors) == 0
+    assert len(results.warnings) == 1
+    assert len(results.future_errors) == 0
+
+    found_match = results.warnings[0].message.find(target_message) != -1
+    # Adding this dict to the assert so that when it does not match, pytest prints the expected and actual values.
+    assert {
+        "expected": target_message,
+        "actual": results.warnings[0].message,
+    } and found_match
+
+
+def check_no_errors_or_warnings(results: SemanticManifestValidationResults) -> None:  # noqa: D
+    assert len(results.errors) == 0
+    assert len(results.warnings) == 0
+    assert len(results.future_errors) == 0
diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py
@@ -1,7 +1,6 @@
 import traceback
-from typing import Dict, Generic, List, Optional, Sequence, Tuple
+from typing import Dict, Generic, List, Optional, Sequence
 
-from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets
 from dbt_semantic_interfaces.errors import ParsingException
 from dbt_semantic_interfaces.implementations.metric import (
     PydanticMetric,
@@ -35,7 +34,6 @@
     ValidationError,
     ValidationIssue,
     ValidationWarning,
-    generate_exception_issue,
     validate_safely,
 )
 
@@ -244,177 +242,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
         return issues
 
 
-class WhereFiltersAreParseable(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
-    """Validates that all Metric WhereFilters are parseable."""
-
-    @staticmethod
-    def _validate_time_granularity_names(
-        context: MetricContext,
-        filter_expression_parameter_sets: Sequence[Tuple[str, FilterCallParameterSets]],
-        custom_granularity_names: List[str],
-    ) -> Sequence[ValidationIssue]:
-        issues: List[ValidationIssue] = []
-
-        valid_granularity_names = [
-            standard_granularity.value for standard_granularity in TimeGranularity
-        ] + custom_granularity_names
-        for _, parameter_set in filter_expression_parameter_sets:
-            for time_dim_call_parameter_set in parameter_set.time_dimension_call_parameter_sets:
-                if not time_dim_call_parameter_set.time_granularity_name:
-                    continue
-                if time_dim_call_parameter_set.time_granularity_name.lower() not in valid_granularity_names:
-                    issues.append(
-                        ValidationWarning(
-                            context=context,
-                            message=f"Filter for metric `{context.metric.metric_name}` is not valid. "
-                            f"`{time_dim_call_parameter_set.time_granularity_name}` is not a valid granularity name. "
-                            f"Valid granularity options: {valid_granularity_names}",
-                        )
-                    )
-        return issues
-
-    @staticmethod
-    @validate_safely(
-        whats_being_done="running model validation ensuring a metric's filter properties are configured properly"
-    )
-    def _validate_metric(metric: Metric, custom_granularity_names: List[str]) -> Sequence[ValidationIssue]:  # noqa: D
-        issues: List[ValidationIssue] = []
-        context = MetricContext(
-            file_context=FileContext.from_metadata(metadata=metric.metadata),
-            metric=MetricModelReference(metric_name=metric.name),
-        )
-
-        if metric.filter is not None:
-            try:
-                metric.filter.filter_expression_parameter_sets
-            except Exception as e:
-                issues.append(
-                    generate_exception_issue(
-                        what_was_being_done=f"trying to parse filter of metric `{metric.name}`",
-                        e=e,
-                        context=context,
-                        extras={
-                            "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                        },
-                    )
-                )
-            else:
-                issues += WhereFiltersAreParseable._validate_time_granularity_names(
-                    context=context,
-                    filter_expression_parameter_sets=metric.filter.filter_expression_parameter_sets,
-                    custom_granularity_names=custom_granularity_names,
-                )
-
-        if metric.type_params:
-            measure = metric.type_params.measure
-            if measure is not None and measure.filter is not None:
-                try:
-                    measure.filter.filter_expression_parameter_sets
-                except Exception as e:
-                    issues.append(
-                        generate_exception_issue(
-                            what_was_being_done=f"trying to parse filter of measure input `{measure.name}` "
-                            f"on metric `{metric.name}`",
-                            e=e,
-                            context=context,
-                            extras={
-                                "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                            },
-                        )
-                    )
-                else:
-                    issues += WhereFiltersAreParseable._validate_time_granularity_names(
-                        context=context,
-                        filter_expression_parameter_sets=measure.filter.filter_expression_parameter_sets,
-                        custom_granularity_names=custom_granularity_names,
-                    )
-
-            numerator = metric.type_params.numerator
-            if numerator is not None and numerator.filter is not None:
-                try:
-                    numerator.filter.filter_expression_parameter_sets
-                except Exception as e:
-                    issues.append(
-                        generate_exception_issue(
-                            what_was_being_done=f"trying to parse the numerator filter on metric `{metric.name}`",
-                            e=e,
-                            context=context,
-                            extras={
-                                "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                            },
-                        )
-                    )
-                else:
-                    issues += WhereFiltersAreParseable._validate_time_granularity_names(
-                        context=context,
-                        filter_expression_parameter_sets=numerator.filter.filter_expression_parameter_sets,
-                        custom_granularity_names=custom_granularity_names,
-                    )
-
-            denominator = metric.type_params.denominator
-            if denominator is not None and denominator.filter is not None:
-                try:
-                    denominator.filter.filter_expression_parameter_sets
-                except Exception as e:
-                    issues.append(
-                        generate_exception_issue(
-                            what_was_being_done=f"trying to parse the denominator filter on metric `{metric.name}`",
-                            e=e,
-                            context=context,
-                            extras={
-                                "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                            },
-                        )
-                    )
-                else:
-                    issues += WhereFiltersAreParseable._validate_time_granularity_names(
-                        context=context,
-                        filter_expression_parameter_sets=denominator.filter.filter_expression_parameter_sets,
-                        custom_granularity_names=custom_granularity_names,
-                    )
-
-            for input_metric in metric.type_params.metrics or []:
-                if input_metric.filter is not None:
-                    try:
-                        input_metric.filter.filter_expression_parameter_sets
-                    except Exception as e:
-                        issues.append(
-                            generate_exception_issue(
-                                what_was_being_done=f"trying to parse filter for input metric `{input_metric.name}` "
-                                f"on metric `{metric.name}`",
-                                e=e,
-                                context=context,
-                                extras={
-                                    "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                                },
-                            )
-                        )
-                    else:
-                        issues += WhereFiltersAreParseable._validate_time_granularity_names(
-                            context=context,
-                            filter_expression_parameter_sets=input_metric.filter.filter_expression_parameter_sets,
-                            custom_granularity_names=custom_granularity_names,
-                        )
-
-            # TODO: Are saved query filters being validated? Task: SL-2932
-        return issues
-
-    @staticmethod
-    @validate_safely(whats_being_done="running manifest validation ensuring all metric where filters are parseable")
-    def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:  # noqa: D
-        issues: List[ValidationIssue] = []
-        custom_granularity_names = [
-            granularity.name
-            for time_spine in semantic_manifest.project_configuration.time_spines
-            for granularity in time_spine.custom_granularities
-        ]
-        for metric in semantic_manifest.metrics or []:
-            issues += WhereFiltersAreParseable._validate_metric(
-                metric=metric, custom_granularity_names=custom_granularity_names
-            )
-        return issues
-
-
 class ConversionMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
     """Checks that conversion metrics are configured properly."""
 

diff --git a/dbt_semantic_interfaces/validations/saved_query.py b/dbt_semantic_interfaces/validations/saved_query.py
@@ -23,15 +23,13 @@
 )
 from dbt_semantic_interfaces.protocols import SemanticManifestT
 from dbt_semantic_interfaces.protocols.saved_query import SavedQuery
-from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
 from dbt_semantic_interfaces.validations.validator_helpers import (
     FileContext,
     SavedQueryContext,
     SavedQueryElementType,
     SemanticManifestValidationRule,
     ValidationError,
     ValidationIssue,
-    ValidationWarning,
     generate_exception_issue,
     validate_safely,
 )
@@ -114,68 +112,6 @@ def _check_metrics(valid_metric_names: Set[str], saved_query: SavedQuery) -> Seq
                 )
         return issues
 
-    @staticmethod
-    @validate_safely("Validate the where field in a saved query.")
-    def _check_where(saved_query: SavedQuery, custom_granularity_names: list[str]) -> Sequence[ValidationIssue]:
-        issues: List[ValidationIssue] = []
-        if saved_query.query_params.where is None:
-            return issues
-        for where_filter in saved_query.query_params.where.where_filters:
-            try:
-                where_filter.call_parameter_sets
-            except Exception as e:
-                issues.append(
-                    generate_exception_issue(
-                        what_was_being_done=f"trying to parse a filter in saved query `{saved_query.name}`",
-                        e=e,
-                        context=SavedQueryContext(
-                            file_context=FileContext.from_metadata(metadata=saved_query.metadata),
-                            element_type=SavedQueryElementType.WHERE,
-                            element_value=where_filter.where_sql_template,
-                        ),
-                        extras={
-                            "traceback": "".join(traceback.format_tb(e.__traceback__)),
-                        },
-                    )
-                )
-            else:
-                issues += SavedQueryRule._check_where_timespine(saved_query, custom_granularity_names)
-
-        return issues
-
-    def _check_where_timespine(
-        saved_query: SavedQuery, custom_granularity_names: list[str]
-    ) -> Sequence[ValidationIssue]:
-        where_param = saved_query.query_params.where
-        if where_param is None:
-            return []
-
-        issues: List[ValidationIssue] = []
-
-        valid_granularity_names = [
-            standard_granularity.name for standard_granularity in TimeGranularity
-        ] + custom_granularity_names
-
-        for where_filter in where_param.where_filters:
-            for time_dim_call_parameter_set in where_filter.call_parameter_sets.time_dimension_call_parameter_sets:
-                if not time_dim_call_parameter_set.time_granularity_name:
-                    continue
-                if time_dim_call_parameter_set.time_granularity_name not in valid_granularity_names:
-                    issues.append(
-                        ValidationWarning(
-                            context=SavedQueryContext(
-                                file_context=FileContext.from_metadata(metadata=saved_query.metadata),
-                                element_type=SavedQueryElementType.WHERE,
-                                element_value=where_filter.where_sql_template,
-                            ),
-                            # message=f"Filter for metric `{context.metric.metric_name}` is not valid. "
-                            message=f"Filter for saved query `{saved_query.name}` is not valid. "
-                            f"`{time_dim_call_parameter_set.time_granularity_name}` is not a valid granularity name. "
-                            f"Valid granularity options: {valid_granularity_names}",
-                        )
-                    )
-        return issues
-
     @staticmethod
     def _parse_query_item(
         saved_query: SavedQuery,
@@ -317,11 +253,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
             for entity in semantic_model.entities:
                 valid_group_by_element_names.add(entity.name)
 
-        custom_granularity_names = [
-            granularity.name
-            for time_spine in semantic_manifest.project_configuration.time_spines
-            for granularity in time_spine.custom_granularities
-        ]
         for saved_query in semantic_manifest.saved_queries:
             issues += SavedQueryRule._check_metrics(
                 valid_metric_names=valid_metric_names,
@@ -331,7 +262,6 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
                 valid_group_by_element_names=valid_group_by_element_names,
                 saved_query=saved_query,
             )
-            issues += SavedQueryRule._check_where(saved_query, custom_granularity_names)
             issues += SavedQueryRule._check_order_by(saved_query)
             issues += SavedQueryRule._check_limit(saved_query)
         return issues

diff --git a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py
@@ -27,7 +27,6 @@
     ConversionMetricRule,
     CumulativeMetricRule,
     DerivedMetricRule,
-    WhereFiltersAreParseable,
 )
 from dbt_semantic_interfaces.validations.non_empty import NonEmptyRule
 from dbt_semantic_interfaces.validations.primary_entity import PrimaryEntityRule
@@ -47,6 +46,9 @@
     SemanticManifestValidationResults,
     SemanticManifestValidationRule,
 )
+from dbt_semantic_interfaces.validations.where_filters import (
+    WhereFiltersAreParseableRule,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -86,7 +88,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]):
         SemanticModelDefaultsRule[SemanticManifestT](),
         PrimaryEntityRule[SemanticManifestT](),
         PrimaryEntityDimensionPairs[SemanticManifestT](),
-        WhereFiltersAreParseable[SemanticManifestT](),
+        WhereFiltersAreParseableRule[SemanticManifestT](),
         SavedQueryRule[SemanticManifestT](),
         MetricLabelsRule[SemanticManifestT](),
         SemanticModelLabelsRule[SemanticManifestT](),