diff --git a/dbt_semantic_interfaces/type_enums/semantic_manifest_node_type.py b/dbt_semantic_interfaces/type_enums/semantic_manifest_node_type.py index dcf5bf75..0b5328b9 100644 --- a/dbt_semantic_interfaces/type_enums/semantic_manifest_node_type.py +++ b/dbt_semantic_interfaces/type_enums/semantic_manifest_node_type.py @@ -7,3 +7,4 @@ class SemanticManifestNodeType(ExtendedEnum): METRIC = "metric" SAVED_QUERY = "saved_query" SEMANTIC_MODEL = "semantic_model" + TIME_SPINE = "time_spine" diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 01dfcdaf..3f7b540e 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -1,6 +1,7 @@ import traceback -from typing import Dict, Generic, List, Optional, Sequence, Set +from typing import Dict, Generic, List, Optional, Sequence, Set, Tuple +from dbt_semantic_interfaces.call_parameter_sets import FilterCallParameterSets from dbt_semantic_interfaces.errors import ParsingException from dbt_semantic_interfaces.implementations.metric import ( PydanticMetric, @@ -262,11 +263,37 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati class WhereFiltersAreParseable(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): """Validates that all Metric WhereFilters are parseable.""" + @staticmethod + def _validate_time_granularity_names( + context: MetricContext, + filter_expression_parameter_sets: Sequence[Tuple[str, FilterCallParameterSets]], + custom_granularity_names: List[str], + ) -> Sequence[ValidationIssue]: + issues: List[ValidationIssue] = [] + + valid_granularity_names = [ + standard_granularity.name for standard_granularity in TimeGranularity + ] + custom_granularity_names + for _, parameter_set in filter_expression_parameter_sets: + for time_dim_call_parameter_set in parameter_set.time_dimension_call_parameter_sets: + if not time_dim_call_parameter_set.time_granularity_name: + continue + if time_dim_call_parameter_set.time_granularity_name not in valid_granularity_names: + issues.append( + ValidationWarning( + context=context, + message=f"Filter for metric `{context.metric.metric_name}` is not valid. " + f"`{time_dim_call_parameter_set.time_granularity_name}` is not a valid granularity name. " + f"Valid granularity options: {valid_granularity_names}", + ) + ) + return issues + @staticmethod @validate_safely( whats_being_done="running model validation ensuring a metric's filter properties are configured properly" ) - def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D + def _validate_metric(metric: Metric, custom_granularity_names: List[str]) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] context = MetricContext( file_context=FileContext.from_metadata(metadata=metric.metadata), @@ -287,6 +314,12 @@ def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D }, ) ) + else: + issues += WhereFiltersAreParseable._validate_time_granularity_names( + context=context, + filter_expression_parameter_sets=metric.filter.filter_expression_parameter_sets, + custom_granularity_names=custom_granularity_names, + ) if metric.type_params: measure = metric.type_params.measure @@ -305,6 +338,12 @@ def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D }, ) ) + else: + issues += WhereFiltersAreParseable._validate_time_granularity_names( + context=context, + filter_expression_parameter_sets=measure.filter.filter_expression_parameter_sets, + custom_granularity_names=custom_granularity_names, + ) numerator = metric.type_params.numerator if numerator is not None and numerator.filter is not None: @@ -321,6 +360,12 @@ def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D }, ) ) + else: + issues += WhereFiltersAreParseable._validate_time_granularity_names( + context=context, + filter_expression_parameter_sets=numerator.filter.filter_expression_parameter_sets, + custom_granularity_names=custom_granularity_names, + ) denominator = metric.type_params.denominator if denominator is not None and denominator.filter is not None: @@ -337,6 +382,12 @@ def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D }, ) ) + else: + issues += WhereFiltersAreParseable._validate_time_granularity_names( + context=context, + filter_expression_parameter_sets=denominator.filter.filter_expression_parameter_sets, + custom_granularity_names=custom_granularity_names, + ) for input_metric in metric.type_params.metrics or []: if input_metric.filter is not None: @@ -354,15 +405,29 @@ def _validate_metric(metric: Metric) -> Sequence[ValidationIssue]: # noqa: D }, ) ) + else: + issues += WhereFiltersAreParseable._validate_time_granularity_names( + context=context, + filter_expression_parameter_sets=input_metric.filter.filter_expression_parameter_sets, + custom_granularity_names=custom_granularity_names, + ) + + # TODO: Are saved query filters being validated? Task: SL-2932 return issues @staticmethod @validate_safely(whats_being_done="running manifest validation ensuring all metric where filters are parseable") def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] - + custom_granularity_names = [ + granularity.name + for time_spine in semantic_manifest.project_configuration.time_spines + for granularity in time_spine.custom_granularities + ] for metric in semantic_manifest.metrics or []: - issues += WhereFiltersAreParseable._validate_metric(metric) + issues += WhereFiltersAreParseable._validate_metric( + metric=metric, custom_granularity_names=custom_granularity_names + ) return issues diff --git a/dbt_semantic_interfaces/validations/unique_valid_name.py b/dbt_semantic_interfaces/validations/unique_valid_name.py index 0ff533a1..203bd2d9 100644 --- a/dbt_semantic_interfaces/validations/unique_valid_name.py +++ b/dbt_semantic_interfaces/validations/unique_valid_name.py @@ -2,7 +2,7 @@ import enum import re -from typing import Dict, Generic, List, Optional, Sequence, Tuple, Union +from typing import Dict, Generic, List, Optional, Sequence, Set, Tuple, Union from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.protocols import ( @@ -102,12 +102,14 @@ def check_valid_name(name: str, context: Optional[ValidationContext] = None) -> @staticmethod @validate_safely(whats_being_done="checking semantic model sub element names are unique") - def _validate_semantic_model_elements(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_semantic_model_elements_and_time_spines(semantic_manifest: SemanticManifest) -> List[ValidationIssue]: issues: List[ValidationIssue] = [] - element_info_tuples: List[Tuple[ElementReference, str, ValidationContext]] = [] + custom_granularity_restricted_names_and_types: Dict[str, str] = {} - if semantic_model.measures: + for semantic_model in semantic_manifest.semantic_models: + element_info_tuples: List[Tuple[ElementReference, str, ValidationContext]] = [] for measure in semantic_model.measures: + custom_granularity_restricted_names_and_types[measure.name] = SemanticModelElementType.MEASURE.value element_info_tuples.append( ( measure.reference, @@ -121,8 +123,8 @@ def _validate_semantic_model_elements(semantic_model: SemanticModel) -> List[Val ), ) ) - if semantic_model.entities: for entity in semantic_model.entities: + custom_granularity_restricted_names_and_types[entity.name] = SemanticModelElementType.ENTITY.value element_info_tuples.append( ( entity.reference, @@ -136,8 +138,8 @@ def _validate_semantic_model_elements(semantic_model: SemanticModel) -> List[Val ), ) ) - if semantic_model.dimensions: for dimension in semantic_model.dimensions: + custom_granularity_restricted_names_and_types[dimension.name] = SemanticModelElementType.DIMENSION.value element_info_tuples.append( ( dimension.reference, @@ -151,22 +153,66 @@ def _validate_semantic_model_elements(semantic_model: SemanticModel) -> List[Val ), ) ) - name_to_type: Dict[ElementReference, str] = {} - for name, _type, context in element_info_tuples: - if name in name_to_type: - issues.append( - ValidationError( - context=context, - message=f"In semantic model `{semantic_model.name}`, can't use name `{name.element_name}` for " - f"a {_type} when it was already used for a {name_to_type[name]}", + # Verify uniqueness for this type within each semantic model + semantic_model_element_reference_to_type: Dict[ElementReference, str] = {} + for reference, _type, context in element_info_tuples: + if reference in semantic_model_element_reference_to_type: + issues.append( + ValidationError( + context=context, + message=f"In semantic model `{semantic_model.name}`, can't use name " + f"`{reference.element_name}` for a {_type} when it was already used for a " + f"{semantic_model_element_reference_to_type[reference]}", + ) ) + else: + semantic_model_element_reference_to_type[reference] = _type + + for name, _, context in element_info_tuples: + issues += UniqueAndValidNameRule.check_valid_name(name=name.element_name, context=context) + + for metric in semantic_manifest.metrics: + custom_granularity_restricted_names_and_types[metric.name] = SemanticManifestNodeType.METRIC.value + for semantic_model in semantic_manifest.semantic_models: + custom_granularity_restricted_names_and_types[ + semantic_model.name + ] = SemanticManifestNodeType.SEMANTIC_MODEL.value + + # Verify custom granularity names are unique across relevant elements + seen_custom_granularity_names: Set[str] = set() + duplicate_custom_granularity_names: Set[str] = set() + for time_spine in semantic_manifest.project_configuration.time_spines: + time_spine_context = ValidationIssueContext( + file_context=FileContext(), + object_name=time_spine.node_relation.alias, + object_type=SemanticManifestNodeType.TIME_SPINE.value, + ) + for custom_granularity in time_spine.custom_granularities: + issues += UniqueAndValidNameRule.check_valid_name( + name=custom_granularity.name, context=time_spine_context ) - else: - name_to_type[name] = _type + if custom_granularity.name in custom_granularity_restricted_names_and_types: + issues.append( + ValidationError( + context=time_spine_context, + message=f"Can't use name `{custom_granularity.name}` for a custom granularity when it was " + "already used for a " + f"{custom_granularity_restricted_names_and_types[custom_granularity.name]}.", + ) + ) + if custom_granularity.name in seen_custom_granularity_names: + duplicate_custom_granularity_names.add(custom_granularity.name) + seen_custom_granularity_names.add(custom_granularity.name) - for name, _, context in element_info_tuples: - issues += UniqueAndValidNameRule.check_valid_name(name=name.element_name, context=context) + if duplicate_custom_granularity_names: + issues.append( + ValidationError( + context=time_spine_context, + message=f"Custom granularity names must be unique, but found duplicate custom granularities with " + f"the names {duplicate_custom_granularity_names}.", + ) + ) return issues @@ -230,9 +276,7 @@ def _validate_top_level_objects(semantic_manifest: SemanticManifest) -> List[Val def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues = [] issues += UniqueAndValidNameRule._validate_top_level_objects(semantic_manifest=semantic_manifest) - - for semantic_model in semantic_manifest.semantic_models: - issues += UniqueAndValidNameRule._validate_semantic_model_elements(semantic_model=semantic_model) + issues += UniqueAndValidNameRule._validate_semantic_model_elements_and_time_spines(semantic_manifest) return issues diff --git a/pyproject.toml b/pyproject.toml index c7daf956..97553646 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dbt-semantic-interfaces" -version = "0.7.2.dev0" +version = "0.7.2" description = 'The shared semantic layer definitions that dbt-core and MetricFlow use' readme = "README.md" requires-python = ">=3.8" diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index e3c30331..d3b61ba4 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -450,6 +450,29 @@ def test_where_filter_validations_bad_input_metric_filter( # noqa: D validator.checked_validations(manifest) +def test_where_filter_validations_invalid_granularity( # noqa: D + simple_semantic_manifest__with_primary_transforms: PydanticSemanticManifest, +) -> None: + manifest = deepcopy(simple_semantic_manifest__with_primary_transforms) + + metric, _ = find_metric_with( + manifest, + lambda metric: metric.type_params is not None + and metric.type_params.metrics is not None + and len(metric.type_params.metrics) > 0, + ) + assert metric.type_params.metrics is not None + input_metric = metric.type_params.metrics[0] + input_metric.filter = PydanticWhereFilterIntersection( + where_filters=[PydanticWhereFilter(where_sql_template="{{ TimeDimension('metric_time', 'cool') }}")] + ) + validator = SemanticManifestValidator[PydanticSemanticManifest]([WhereFiltersAreParseable()]) + issues = validator.validate_semantic_manifest(manifest) + assert not issues.has_blocking_issues + assert len(issues.warnings) == 1 + assert "`cool` is not a valid granularity name" in issues.warnings[0].message + + def test_conversion_metrics() -> None: # noqa: D base_measure_name = "base_measure" conversion_measure_name = "conversion_measure" diff --git a/tests/validations/test_time_spines.py b/tests/validations/test_time_spines.py index 6c9d229e..822c41d7 100644 --- a/tests/validations/test_time_spines.py +++ b/tests/validations/test_time_spines.py @@ -4,6 +4,11 @@ ) from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure +from dbt_semantic_interfaces.implementations.metric import ( + PydanticMetric, + PydanticMetricInputMeasure, + PydanticMetricTypeParams, +) from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation from dbt_semantic_interfaces.implementations.project_configuration import ( PydanticProjectConfiguration, @@ -22,6 +27,7 @@ AggregationType, DimensionType, EntityType, + MetricType, TimeGranularity, ) from dbt_semantic_interfaces.validations.semantic_manifest_validator import ( @@ -182,3 +188,89 @@ def test_dimension_granularity_smaller_than_time_spine() -> None: # noqa: D "configuring a time spine at or below the smallest time dimension granularity is recommended" in issues.warnings[0].message ) + + +def test_time_spines_with_invalid_names() -> None: # noqa: D + semantic_manifest = PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="semantic_model", + measures=[ + PydanticMeasure( + name="sum_measure", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True + ) + ], + dimensions=[ + PydanticDimension( + name="dim", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND), + ) + ], + entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)], + ), + ], + metrics=[ + PydanticMetric( + name="metric", + description=None, + type=MetricType.SIMPLE, + type_params=PydanticMetricTypeParams(measure=PydanticMetricInputMeasure(name="sum_measure")), + ), + ], + project_configuration=PydanticProjectConfiguration( + time_spine_table_configurations=[], + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.MONTH), + custom_granularities=[ + PydanticTimeSpineCustomGranularityColumn(name="retail_year"), + ], + ), + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY), + custom_granularities=[ + PydanticTimeSpineCustomGranularityColumn(name="retail_year"), + PydanticTimeSpineCustomGranularityColumn(name="quarter"), + PydanticTimeSpineCustomGranularityColumn(name="semantic_model"), + PydanticTimeSpineCustomGranularityColumn(name="sum_measure"), + PydanticTimeSpineCustomGranularityColumn(name="dim"), + PydanticTimeSpineCustomGranularityColumn(name="entity"), + PydanticTimeSpineCustomGranularityColumn(name="metric"), + PydanticTimeSpineCustomGranularityColumn(name="mYfUnCuStOmGrAnUlArItY"), + ], + ), + ], + ), + ) + issues = SemanticManifestValidator[PydanticSemanticManifest]().validate_semantic_manifest(semantic_manifest) + assert len(issues.warnings) == 1 + warning_msg = "To avoid unexpected query errors, configuring a time spine at or below the smallest" + assert issues.warnings[0].message.startswith(warning_msg) + + assert issues.has_blocking_issues + assert len(issues.errors) == 8 + error_messages = {err.message for err in issues.errors} + for msg in [ + ( + "Custom granularity names must be unique, but found duplicate custom granularities with the names " + "{'retail_year'}." + ), + "Can't use name `semantic_model` for a custom granularity when it was already used for a semantic_model.", + "Can't use name `sum_measure` for a custom granularity when it was already used for a measure.", + "Can't use name `dim` for a custom granularity when it was already used for a dimension.", + "Can't use name `entity` for a custom granularity when it was already used for a entity.", + "Can't use name `metric` for a custom granularity when it was already used for a metric.", + ( + "Invalid name `quarter` - names cannot match reserved time granularity keywords (['NANOSECOND', " + "'MICROSECOND', 'MILLISECOND', 'SECOND', 'MINUTE', 'HOUR', 'DAY', 'WEEK', 'MONTH', 'QUARTER', 'YEAR'])" + ), + ( + "Invalid name `mYfUnCuStOmGrAnUlArItY` - names may only contain lower case letters, numbers, and " + "underscores. Additionally, names must start with a lower case letter, cannot end with an underscore, " + "cannot contain dunders (double underscores, or __), and must be at least 2 characters long." + ), + ]: + assert msg in error_messages